LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 Found = true;
265 else if (NewVT == MVT::i64 &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
288 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
317 OldRHS, Chain);
318}
319
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
424 SDValue Ops[2] = {NewLHS, NewRHS};
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(0, dl, RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
436 "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(CCCode, RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
452 "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
461 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
462 DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
466 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(CCCode, RetVT);
470 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
471 if (Chain)
472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
473 Call2.second);
474 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
475 Tmp.getValueType(), Tmp, NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
483 // In non-pic modes, just use the address of a block.
486
487 // Otherwise, use a label difference.
489}
490
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
512 }
513 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
514}
515
516bool
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
571 return false;
572
573 if (!C.isSubsetOf(DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
577 Op->getFlags());
578 return TLO.CombineTo(Op, NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
633 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, dl, SmallVT,
648 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
652 return TLO.CombineTo(Op, Z);
653 }
654 }
655 return false;
656}
657
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(Op.getNode());
669 }
670 return Simplified;
671}
672
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(Op.getNode());
686 }
687 return Simplified;
688}
689
691 KnownBits &Known,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
744 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
751 for (unsigned i = 0; i != Scale; ++i) {
752 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
753 unsigned BitOffset = EltOffset * NumSrcEltBits;
754 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
755 }
756 // Recursive calls below may turn not demanded elements into poison, so we
757 // need to demand all smaller source elements that maps to a demanded
758 // destination element.
759 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
760
762 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
763 return DAG.getBitcast(DstVT, V);
764 }
765
766 // TODO - bigendian once we have test coverage.
767 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
768 unsigned Scale = NumSrcEltBits / NumDstEltBits;
769 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
770 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
771 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
772 for (unsigned i = 0; i != NumElts; ++i)
773 if (DemandedElts[i]) {
774 unsigned Offset = (i % Scale) * NumDstEltBits;
775 DemandedSrcBits.insertBits(DemandedBits, Offset);
776 DemandedSrcElts.setBit(i / Scale);
777 }
778
780 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
781 return DAG.getBitcast(DstVT, V);
782 }
783
784 break;
785 }
786 case ISD::AND: {
787 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
788 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
789
790 // If all of the demanded bits are known 1 on one side, return the other.
791 // These bits cannot contribute to the result of the 'and' in this
792 // context.
793 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
794 return Op.getOperand(0);
795 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
796 return Op.getOperand(1);
797 break;
798 }
799 case ISD::OR: {
800 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
801 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802
803 // If all of the demanded bits are known zero on one side, return the
804 // other. These bits cannot contribute to the result of the 'or' in this
805 // context.
806 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
807 return Op.getOperand(0);
808 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
809 return Op.getOperand(1);
810 break;
811 }
812 case ISD::XOR: {
813 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
814 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
815
816 // If all of the demanded bits are known zero on one side, return the
817 // other.
818 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
819 return Op.getOperand(0);
820 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
821 return Op.getOperand(1);
822 break;
823 }
824 case ISD::ADD:
825 case ISD::MUL:
826 case ISD::SMIN:
827 case ISD::SMAX:
828 case ISD::UMIN:
829 case ISD::UMAX: {
830 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(1),
831 DemandedElts, 1, Depth + 1))
832 return Op.getOperand(0);
833
834 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(0),
835 DemandedElts, 0, Depth + 1))
836 return Op.getOperand(1);
837 break;
838 }
839 case ISD::SHL: {
840 // If we are only demanding sign bits then we can use the shift source
841 // directly.
842 if (std::optional<unsigned> MaxSA =
843 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
844 SDValue Op0 = Op.getOperand(0);
845 unsigned ShAmt = *MaxSA;
846 unsigned NumSignBits =
847 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
848 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
849 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
850 return Op0;
851 }
852 break;
853 }
854 case ISD::SRL: {
855 // If we are only demanding sign bits then we can use the shift source
856 // directly.
857 if (std::optional<unsigned> MaxSA =
858 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
859 SDValue Op0 = Op.getOperand(0);
860 unsigned ShAmt = *MaxSA;
861 // Must already be signbits in DemandedBits bounds, and can't demand any
862 // shifted in zeroes.
863 if (DemandedBits.countl_zero() >= ShAmt) {
864 unsigned NumSignBits =
865 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
866 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
867 return Op0;
868 }
869 }
870 break;
871 }
872 case ISD::SETCC: {
873 SDValue Op0 = Op.getOperand(0);
874 SDValue Op1 = Op.getOperand(1);
875 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
876 // If (1) we only need the sign-bit, (2) the setcc operands are the same
877 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
878 // -1, we may be able to bypass the setcc.
879 if (DemandedBits.isSignMask() &&
883 // If we're testing X < 0, then this compare isn't needed - just use X!
884 // FIXME: We're limiting to integer types here, but this should also work
885 // if we don't care about FP signed-zero. The use of SETLT with FP means
886 // that we don't care about NaNs.
887 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
889 return Op0;
890 }
891 break;
892 }
894 // If none of the extended bits are demanded, eliminate the sextinreg.
895 SDValue Op0 = Op.getOperand(0);
896 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
897 unsigned ExBits = ExVT.getScalarSizeInBits();
898 if (DemandedBits.getActiveBits() <= ExBits &&
900 return Op0;
901 // If the input is already sign extended, just drop the extension.
902 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
903 if (NumSignBits >= (BitWidth - ExBits + 1))
904 return Op0;
905 break;
906 }
910 if (VT.isScalableVector())
911 return SDValue();
912
913 // If we only want the lowest element and none of extended bits, then we can
914 // return the bitcasted source vector.
915 SDValue Src = Op.getOperand(0);
916 EVT SrcVT = Src.getValueType();
917 EVT DstVT = Op.getValueType();
918 if (IsLE && DemandedElts == 1 &&
919 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
920 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
921 return DAG.getBitcast(DstVT, Src);
922 }
923 break;
924 }
926 if (VT.isScalableVector())
927 return SDValue();
928
929 // If we don't demand the inserted element, return the base vector.
930 SDValue Vec = Op.getOperand(0);
931 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
932 EVT VecVT = Vec.getValueType();
933 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
934 !DemandedElts[CIdx->getZExtValue()])
935 return Vec;
936 break;
937 }
939 if (VT.isScalableVector())
940 return SDValue();
941
942 SDValue Vec = Op.getOperand(0);
943 SDValue Sub = Op.getOperand(1);
944 uint64_t Idx = Op.getConstantOperandVal(2);
945 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
946 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
947 // If we don't demand the inserted subvector, return the base vector.
948 if (DemandedSubElts == 0)
949 return Vec;
950 break;
951 }
952 case ISD::VECTOR_SHUFFLE: {
954 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
955
956 // If all the demanded elts are from one operand and are inline,
957 // then we can use the operand directly.
958 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
959 for (unsigned i = 0; i != NumElts; ++i) {
960 int M = ShuffleMask[i];
961 if (M < 0 || !DemandedElts[i])
962 continue;
963 AllUndef = false;
964 IdentityLHS &= (M == (int)i);
965 IdentityRHS &= ((M - NumElts) == i);
966 }
967
968 if (AllUndef)
969 return DAG.getUNDEF(Op.getValueType());
970 if (IdentityLHS)
971 return Op.getOperand(0);
972 if (IdentityRHS)
973 return Op.getOperand(1);
974 break;
975 }
976 default:
977 // TODO: Probably okay to remove after audit; here to reduce change size
978 // in initial enablement patch for scalable vectors
979 if (VT.isScalableVector())
980 return SDValue();
981
982 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
984 Op, DemandedBits, DemandedElts, DAG, Depth))
985 return V;
986 break;
987 }
988 return SDValue();
989}
990
993 unsigned Depth) const {
994 EVT VT = Op.getValueType();
995 // Since the number of lanes in a scalable vector is unknown at compile time,
996 // we track one bit which is implicitly broadcast to all lanes. This means
997 // that all lanes in a scalable vector are considered demanded.
998 APInt DemandedElts = VT.isFixedLengthVector()
1000 : APInt(1, 1);
1001 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1002 Depth);
1003}
1004
1006 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1007 unsigned Depth) const {
1008 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1009 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1010 Depth);
1011}
1012
1013// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1014// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1017 const TargetLowering &TLI,
1018 const APInt &DemandedBits,
1019 const APInt &DemandedElts, unsigned Depth) {
1020 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1021 "SRL or SRA node is required here!");
1022 // Is the right shift using an immediate value of 1?
1023 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1024 if (!N1C || !N1C->isOne())
1025 return SDValue();
1026
1027 // We are looking for an avgfloor
1028 // add(ext, ext)
1029 // or one of these as a avgceil
1030 // add(add(ext, ext), 1)
1031 // add(add(ext, 1), ext)
1032 // add(ext, add(ext, 1))
1033 SDValue Add = Op.getOperand(0);
1034 if (Add.getOpcode() != ISD::ADD)
1035 return SDValue();
1036
1037 SDValue ExtOpA = Add.getOperand(0);
1038 SDValue ExtOpB = Add.getOperand(1);
1039 SDValue Add2;
1040 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1041 ConstantSDNode *ConstOp;
1042 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1043 ConstOp->isOne()) {
1044 ExtOpA = Op1;
1045 ExtOpB = Op3;
1046 Add2 = A;
1047 return true;
1048 }
1049 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1050 ConstOp->isOne()) {
1051 ExtOpA = Op1;
1052 ExtOpB = Op2;
1053 Add2 = A;
1054 return true;
1055 }
1056 return false;
1057 };
1058 bool IsCeil =
1059 (ExtOpA.getOpcode() == ISD::ADD &&
1060 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1061 (ExtOpB.getOpcode() == ISD::ADD &&
1062 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1063
1064 // If the shift is signed (sra):
1065 // - Needs >= 2 sign bit for both operands.
1066 // - Needs >= 2 zero bits.
1067 // If the shift is unsigned (srl):
1068 // - Needs >= 1 zero bit for both operands.
1069 // - Needs 1 demanded bit zero and >= 2 sign bits.
1070 SelectionDAG &DAG = TLO.DAG;
1071 unsigned ShiftOpc = Op.getOpcode();
1072 bool IsSigned = false;
1073 unsigned KnownBits;
1074 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1075 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1076 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1077 unsigned NumZeroA =
1078 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1079 unsigned NumZeroB =
1080 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1081 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1082
1083 switch (ShiftOpc) {
1084 default:
1085 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1086 case ISD::SRA: {
1087 if (NumZero >= 2 && NumSigned < NumZero) {
1088 IsSigned = false;
1089 KnownBits = NumZero;
1090 break;
1091 }
1092 if (NumSigned >= 1) {
1093 IsSigned = true;
1094 KnownBits = NumSigned;
1095 break;
1096 }
1097 return SDValue();
1098 }
1099 case ISD::SRL: {
1100 if (NumZero >= 1 && NumSigned < NumZero) {
1101 IsSigned = false;
1102 KnownBits = NumZero;
1103 break;
1104 }
1105 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1106 IsSigned = true;
1107 KnownBits = NumSigned;
1108 break;
1109 }
1110 return SDValue();
1111 }
1112 }
1113
1114 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1115 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1116
1117 // Find the smallest power-2 type that is legal for this vector size and
1118 // operation, given the original type size and the number of known sign/zero
1119 // bits.
1120 EVT VT = Op.getValueType();
1121 unsigned MinWidth =
1122 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1123 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1125 return SDValue();
1126 if (VT.isVector())
1127 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1128 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1129 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1130 // larger type size to do the transform.
1131 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1132 return SDValue();
1133 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1134 Add.getOperand(1)) &&
1135 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1136 Add2.getOperand(1))))
1137 NVT = VT;
1138 else
1139 return SDValue();
1140 }
1141
1142 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1143 // this is likely to stop other folds (reassociation, value tracking etc.)
1144 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1145 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1146 return SDValue();
1147
1148 SDLoc DL(Op);
1149 SDValue ResultAVG =
1150 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1151 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1152 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1153}
1154
1155/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1156/// result of Op are ever used downstream. If we can use this information to
1157/// simplify Op, create a new simplified DAG node and return true, returning the
1158/// original and new nodes in Old and New. Otherwise, analyze the expression and
1159/// return a mask of Known bits for the expression (used to simplify the
1160/// caller). The Known bits may only be accurate for those bits in the
1161/// OriginalDemandedBits and OriginalDemandedElts.
1163 SDValue Op, const APInt &OriginalDemandedBits,
1164 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1165 unsigned Depth, bool AssumeSingleUse) const {
1166 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1167 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1168 "Mask size mismatches value type size!");
1169
1170 // Don't know anything.
1171 Known = KnownBits(BitWidth);
1172
1173 EVT VT = Op.getValueType();
1174 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1175 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1176 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1177 "Unexpected vector size");
1178
1179 APInt DemandedBits = OriginalDemandedBits;
1180 APInt DemandedElts = OriginalDemandedElts;
1181 SDLoc dl(Op);
1182
1183 // Undef operand.
1184 if (Op.isUndef())
1185 return false;
1186
1187 // We can't simplify target constants.
1188 if (Op.getOpcode() == ISD::TargetConstant)
1189 return false;
1190
1191 if (Op.getOpcode() == ISD::Constant) {
1192 // We know all of the bits for a constant!
1193 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1194 return false;
1195 }
1196
1197 if (Op.getOpcode() == ISD::ConstantFP) {
1198 // We know all of the bits for a floating point constant!
1200 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1201 return false;
1202 }
1203
1204 // Other users may use these bits.
1205 bool HasMultiUse = false;
1206 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1208 // Limit search depth.
1209 return false;
1210 }
1211 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1213 DemandedElts = APInt::getAllOnes(NumElts);
1214 HasMultiUse = true;
1215 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1216 // Not demanding any bits/elts from Op.
1217 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1218 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1219 // Limit search depth.
1220 return false;
1221 }
1222
1223 KnownBits Known2;
1224 switch (Op.getOpcode()) {
1225 case ISD::SCALAR_TO_VECTOR: {
1226 if (VT.isScalableVector())
1227 return false;
1228 if (!DemandedElts[0])
1229 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1230
1231 KnownBits SrcKnown;
1232 SDValue Src = Op.getOperand(0);
1233 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1234 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1235 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1236 return true;
1237
1238 // Upper elements are undef, so only get the knownbits if we just demand
1239 // the bottom element.
1240 if (DemandedElts == 1)
1241 Known = SrcKnown.anyextOrTrunc(BitWidth);
1242 break;
1243 }
1244 case ISD::BUILD_VECTOR:
1245 // Collect the known bits that are shared by every demanded element.
1246 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1247 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1248 return false; // Don't fall through, will infinitely loop.
1249 case ISD::SPLAT_VECTOR: {
1250 SDValue Scl = Op.getOperand(0);
1251 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1252 KnownBits KnownScl;
1253 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1254 return true;
1255
1256 // Implicitly truncate the bits to match the official semantics of
1257 // SPLAT_VECTOR.
1258 Known = KnownScl.trunc(BitWidth);
1259 break;
1260 }
1261 case ISD::LOAD: {
1262 auto *LD = cast<LoadSDNode>(Op);
1263 if (getTargetConstantFromLoad(LD)) {
1264 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1265 return false; // Don't fall through, will infinitely loop.
1266 }
1267 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1268 // If this is a ZEXTLoad and we are looking at the loaded value.
1269 EVT MemVT = LD->getMemoryVT();
1270 unsigned MemBits = MemVT.getScalarSizeInBits();
1271 Known.Zero.setBitsFrom(MemBits);
1272 return false; // Don't fall through, will infinitely loop.
1273 }
1274 break;
1275 }
1277 if (VT.isScalableVector())
1278 return false;
1279 SDValue Vec = Op.getOperand(0);
1280 SDValue Scl = Op.getOperand(1);
1281 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1282 EVT VecVT = Vec.getValueType();
1283
1284 // If index isn't constant, assume we need all vector elements AND the
1285 // inserted element.
1286 APInt DemandedVecElts(DemandedElts);
1287 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1288 unsigned Idx = CIdx->getZExtValue();
1289 DemandedVecElts.clearBit(Idx);
1290
1291 // Inserted element is not required.
1292 if (!DemandedElts[Idx])
1293 return TLO.CombineTo(Op, Vec);
1294 }
1295
1296 KnownBits KnownScl;
1297 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1298 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1299 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1300 return true;
1301
1302 Known = KnownScl.anyextOrTrunc(BitWidth);
1303
1304 KnownBits KnownVec;
1305 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1306 Depth + 1))
1307 return true;
1308
1309 if (!!DemandedVecElts)
1310 Known = Known.intersectWith(KnownVec);
1311
1312 return false;
1313 }
1314 case ISD::INSERT_SUBVECTOR: {
1315 if (VT.isScalableVector())
1316 return false;
1317 // Demand any elements from the subvector and the remainder from the src its
1318 // inserted into.
1319 SDValue Src = Op.getOperand(0);
1320 SDValue Sub = Op.getOperand(1);
1321 uint64_t Idx = Op.getConstantOperandVal(2);
1322 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1323 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1324 APInt DemandedSrcElts = DemandedElts;
1325 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1326
1327 KnownBits KnownSub, KnownSrc;
1328 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1329 Depth + 1))
1330 return true;
1331 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1332 Depth + 1))
1333 return true;
1334
1335 Known.setAllConflict();
1336 if (!!DemandedSubElts)
1337 Known = Known.intersectWith(KnownSub);
1338 if (!!DemandedSrcElts)
1339 Known = Known.intersectWith(KnownSrc);
1340
1341 // Attempt to avoid multi-use src if we don't need anything from it.
1342 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1343 !DemandedSrcElts.isAllOnes()) {
1345 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1347 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1348 if (NewSub || NewSrc) {
1349 NewSub = NewSub ? NewSub : Sub;
1350 NewSrc = NewSrc ? NewSrc : Src;
1351 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1352 Op.getOperand(2));
1353 return TLO.CombineTo(Op, NewOp);
1354 }
1355 }
1356 break;
1357 }
1359 if (VT.isScalableVector())
1360 return false;
1361 // Offset the demanded elts by the subvector index.
1362 SDValue Src = Op.getOperand(0);
1363 if (Src.getValueType().isScalableVector())
1364 break;
1365 uint64_t Idx = Op.getConstantOperandVal(1);
1366 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1367 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1368
1369 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1370 Depth + 1))
1371 return true;
1372
1373 // Attempt to avoid multi-use src if we don't need anything from it.
1374 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1376 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1377 if (DemandedSrc) {
1378 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1379 Op.getOperand(1));
1380 return TLO.CombineTo(Op, NewOp);
1381 }
1382 }
1383 break;
1384 }
1385 case ISD::CONCAT_VECTORS: {
1386 if (VT.isScalableVector())
1387 return false;
1388 Known.setAllConflict();
1389 EVT SubVT = Op.getOperand(0).getValueType();
1390 unsigned NumSubVecs = Op.getNumOperands();
1391 unsigned NumSubElts = SubVT.getVectorNumElements();
1392 for (unsigned i = 0; i != NumSubVecs; ++i) {
1393 APInt DemandedSubElts =
1394 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1395 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1396 Known2, TLO, Depth + 1))
1397 return true;
1398 // Known bits are shared by every demanded subvector element.
1399 if (!!DemandedSubElts)
1400 Known = Known.intersectWith(Known2);
1401 }
1402 break;
1403 }
1404 case ISD::VECTOR_SHUFFLE: {
1405 assert(!VT.isScalableVector());
1406 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1407
1408 // Collect demanded elements from shuffle operands..
1409 APInt DemandedLHS, DemandedRHS;
1410 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1411 DemandedRHS))
1412 break;
1413
1414 if (!!DemandedLHS || !!DemandedRHS) {
1415 SDValue Op0 = Op.getOperand(0);
1416 SDValue Op1 = Op.getOperand(1);
1417
1418 Known.setAllConflict();
1419 if (!!DemandedLHS) {
1420 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1421 Depth + 1))
1422 return true;
1423 Known = Known.intersectWith(Known2);
1424 }
1425 if (!!DemandedRHS) {
1426 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1427 Depth + 1))
1428 return true;
1429 Known = Known.intersectWith(Known2);
1430 }
1431
1432 // Attempt to avoid multi-use ops if we don't need anything from them.
1434 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1436 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1437 if (DemandedOp0 || DemandedOp1) {
1438 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1439 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1440 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1441 return TLO.CombineTo(Op, NewOp);
1442 }
1443 }
1444 break;
1445 }
1446 case ISD::AND: {
1447 SDValue Op0 = Op.getOperand(0);
1448 SDValue Op1 = Op.getOperand(1);
1449
1450 // If the RHS is a constant, check to see if the LHS would be zero without
1451 // using the bits from the RHS. Below, we use knowledge about the RHS to
1452 // simplify the LHS, here we're using information from the LHS to simplify
1453 // the RHS.
1454 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1455 // Do not increment Depth here; that can cause an infinite loop.
1456 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1457 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1458 if ((LHSKnown.Zero & DemandedBits) ==
1459 (~RHSC->getAPIntValue() & DemandedBits))
1460 return TLO.CombineTo(Op, Op0);
1461
1462 // If any of the set bits in the RHS are known zero on the LHS, shrink
1463 // the constant.
1464 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1465 DemandedElts, TLO))
1466 return true;
1467
1468 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1469 // constant, but if this 'and' is only clearing bits that were just set by
1470 // the xor, then this 'and' can be eliminated by shrinking the mask of
1471 // the xor. For example, for a 32-bit X:
1472 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1473 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1474 LHSKnown.One == ~RHSC->getAPIntValue()) {
1475 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1476 return TLO.CombineTo(Op, Xor);
1477 }
1478 }
1479
1480 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1481 SDValue X, Y;
1482 if (sd_match(Op,
1483 m_And(m_Value(Y),
1485 m_Sub(m_Value(X), m_Deferred(Y)))))) &&
1486 TLO.DAG.isKnownToBeAPowerOfTwo(Y, DemandedElts, /*OrZero=*/true)) {
1487 return TLO.CombineTo(
1488 Op, TLO.DAG.getNode(ISD::AND, dl, VT, TLO.DAG.getNOT(dl, X, VT), Y));
1489 }
1490
1491 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1492 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1493 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1494 (Op0.getOperand(0).isUndef() ||
1496 Op0->hasOneUse()) {
1497 unsigned NumSubElts =
1499 unsigned SubIdx = Op0.getConstantOperandVal(2);
1500 APInt DemandedSub =
1501 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1502 KnownBits KnownSubMask =
1503 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1504 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1505 SDValue NewAnd =
1506 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1507 SDValue NewInsert =
1508 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1509 Op0.getOperand(1), Op0.getOperand(2));
1510 return TLO.CombineTo(Op, NewInsert);
1511 }
1512 }
1513
1514 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1515 Depth + 1))
1516 return true;
1517 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1518 Known2, TLO, Depth + 1))
1519 return true;
1520
1521 // If all of the demanded bits are known one on one side, return the other.
1522 // These bits cannot contribute to the result of the 'and'.
1523 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1524 return TLO.CombineTo(Op, Op0);
1525 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1526 return TLO.CombineTo(Op, Op1);
1527 // If all of the demanded bits in the inputs are known zeros, return zero.
1528 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1529 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1530 // If the RHS is a constant, see if we can simplify it.
1531 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1532 TLO))
1533 return true;
1534 // If the operation can be done in a smaller type, do so.
1536 return true;
1537
1538 // Attempt to avoid multi-use ops if we don't need anything from them.
1539 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1541 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1543 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1544 if (DemandedOp0 || DemandedOp1) {
1545 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1546 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1547 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1548 return TLO.CombineTo(Op, NewOp);
1549 }
1550 }
1551
1552 Known &= Known2;
1553 break;
1554 }
1555 case ISD::OR: {
1556 SDValue Op0 = Op.getOperand(0);
1557 SDValue Op1 = Op.getOperand(1);
1558 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1559 Depth + 1)) {
1560 Op->dropFlags(SDNodeFlags::Disjoint);
1561 return true;
1562 }
1563
1564 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1565 Known2, TLO, Depth + 1)) {
1566 Op->dropFlags(SDNodeFlags::Disjoint);
1567 return true;
1568 }
1569
1570 // If all of the demanded bits are known zero on one side, return the other.
1571 // These bits cannot contribute to the result of the 'or'.
1572 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1573 return TLO.CombineTo(Op, Op0);
1574 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1575 return TLO.CombineTo(Op, Op1);
1576 // If the RHS is a constant, see if we can simplify it.
1577 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1578 return true;
1579 // If the operation can be done in a smaller type, do so.
1581 return true;
1582
1583 // Attempt to avoid multi-use ops if we don't need anything from them.
1584 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1586 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1588 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1589 if (DemandedOp0 || DemandedOp1) {
1590 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1591 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1592 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1593 return TLO.CombineTo(Op, NewOp);
1594 }
1595 }
1596
1597 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1598 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1599 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1600 Op0->hasOneUse() && Op1->hasOneUse()) {
1601 // Attempt to match all commutations - m_c_Or would've been useful!
1602 for (int I = 0; I != 2; ++I) {
1603 SDValue X = Op.getOperand(I).getOperand(0);
1604 SDValue C1 = Op.getOperand(I).getOperand(1);
1605 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1606 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1607 if (Alt.getOpcode() == ISD::OR) {
1608 for (int J = 0; J != 2; ++J) {
1609 if (X == Alt.getOperand(J)) {
1610 SDValue Y = Alt.getOperand(1 - J);
1611 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1612 {C1, C2})) {
1613 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1614 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1615 return TLO.CombineTo(
1616 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1617 }
1618 }
1619 }
1620 }
1621 }
1622 }
1623
1624 Known |= Known2;
1625 break;
1626 }
1627 case ISD::XOR: {
1628 SDValue Op0 = Op.getOperand(0);
1629 SDValue Op1 = Op.getOperand(1);
1630
1631 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1632 Depth + 1))
1633 return true;
1634 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1635 Depth + 1))
1636 return true;
1637
1638 // If all of the demanded bits are known zero on one side, return the other.
1639 // These bits cannot contribute to the result of the 'xor'.
1640 if (DemandedBits.isSubsetOf(Known.Zero))
1641 return TLO.CombineTo(Op, Op0);
1642 if (DemandedBits.isSubsetOf(Known2.Zero))
1643 return TLO.CombineTo(Op, Op1);
1644 // If the operation can be done in a smaller type, do so.
1646 return true;
1647
1648 // If all of the unknown bits are known to be zero on one side or the other
1649 // turn this into an *inclusive* or.
1650 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1651 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1652 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1653
1654 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1655 if (C) {
1656 // If one side is a constant, and all of the set bits in the constant are
1657 // also known set on the other side, turn this into an AND, as we know
1658 // the bits will be cleared.
1659 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1660 // NB: it is okay if more bits are known than are requested
1661 if (C->getAPIntValue() == Known2.One) {
1662 SDValue ANDC =
1663 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1664 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1665 }
1666
1667 // If the RHS is a constant, see if we can change it. Don't alter a -1
1668 // constant because that's a 'not' op, and that is better for combining
1669 // and codegen.
1670 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1671 // We're flipping all demanded bits. Flip the undemanded bits too.
1672 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1673 return TLO.CombineTo(Op, New);
1674 }
1675
1676 unsigned Op0Opcode = Op0.getOpcode();
1677 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1678 if (ConstantSDNode *ShiftC =
1679 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1680 // Don't crash on an oversized shift. We can not guarantee that a
1681 // bogus shift has been simplified to undef.
1682 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1683 uint64_t ShiftAmt = ShiftC->getZExtValue();
1685 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1686 : Ones.lshr(ShiftAmt);
1687 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1689 // If the xor constant is a demanded mask, do a 'not' before the
1690 // shift:
1691 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1692 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1693 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1694 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1695 Op0.getOperand(1)));
1696 }
1697 }
1698 }
1699 }
1700 }
1701
1702 // If we can't turn this into a 'not', try to shrink the constant.
1703 if (!C || !C->isAllOnes())
1704 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1705 return true;
1706
1707 // Attempt to avoid multi-use ops if we don't need anything from them.
1708 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1710 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1712 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1713 if (DemandedOp0 || DemandedOp1) {
1714 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1715 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1716 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1717 return TLO.CombineTo(Op, NewOp);
1718 }
1719 }
1720
1721 Known ^= Known2;
1722 break;
1723 }
1724 case ISD::SELECT:
1725 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1726 Known, TLO, Depth + 1))
1727 return true;
1728 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1729 Known2, TLO, Depth + 1))
1730 return true;
1731
1732 // If the operands are constants, see if we can simplify them.
1733 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1734 return true;
1735
1736 // Only known if known in both the LHS and RHS.
1737 Known = Known.intersectWith(Known2);
1738 break;
1739 case ISD::VSELECT:
1740 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1741 Known, TLO, Depth + 1))
1742 return true;
1743 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1744 Known2, TLO, Depth + 1))
1745 return true;
1746
1747 // Only known if known in both the LHS and RHS.
1748 Known = Known.intersectWith(Known2);
1749 break;
1750 case ISD::SELECT_CC:
1751 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1752 Known, TLO, Depth + 1))
1753 return true;
1754 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1755 Known2, TLO, Depth + 1))
1756 return true;
1757
1758 // If the operands are constants, see if we can simplify them.
1759 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1760 return true;
1761
1762 // Only known if known in both the LHS and RHS.
1763 Known = Known.intersectWith(Known2);
1764 break;
1765 case ISD::SETCC: {
1766 SDValue Op0 = Op.getOperand(0);
1767 SDValue Op1 = Op.getOperand(1);
1768 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1769 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1770 // (X is of integer type) then we only need the sign mask of the previous
1771 // result
1772 if (Op1.getValueType().isInteger() &&
1773 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(Op1)) ||
1774 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1775 isAllOnesOrAllOnesSplat(Op1)))) {
1776 KnownBits KnownOp0;
1779 DemandedElts, KnownOp0, TLO, Depth + 1))
1780 return true;
1781 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1782 // width as the setcc result, and (3) the result of a setcc conforms to 0
1783 // or -1, we may be able to bypass the setcc.
1784 if (DemandedBits.isSignMask() &&
1788 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1789 // NOT Operation
1790 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1791 SDLoc DL(Op);
1792 EVT VT = Op0.getValueType();
1793 SDValue NotOp0 = TLO.DAG.getNOT(DL, Op0, VT);
1794 return TLO.CombineTo(Op, NotOp0);
1795 }
1796 return TLO.CombineTo(Op, Op0);
1797 }
1798 }
1799 if (getBooleanContents(Op0.getValueType()) ==
1801 BitWidth > 1)
1802 Known.Zero.setBitsFrom(1);
1803 break;
1804 }
1805 case ISD::SHL: {
1806 SDValue Op0 = Op.getOperand(0);
1807 SDValue Op1 = Op.getOperand(1);
1808 EVT ShiftVT = Op1.getValueType();
1809
1810 if (std::optional<unsigned> KnownSA =
1811 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1812 unsigned ShAmt = *KnownSA;
1813 if (ShAmt == 0)
1814 return TLO.CombineTo(Op, Op0);
1815
1816 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1817 // single shift. We can do this if the bottom bits (which are shifted
1818 // out) are never demanded.
1819 // TODO - support non-uniform vector amounts.
1820 if (Op0.getOpcode() == ISD::SRL) {
1821 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1822 if (std::optional<unsigned> InnerSA =
1823 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1824 unsigned C1 = *InnerSA;
1825 unsigned Opc = ISD::SHL;
1826 int Diff = ShAmt - C1;
1827 if (Diff < 0) {
1828 Diff = -Diff;
1829 Opc = ISD::SRL;
1830 }
1831 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1832 return TLO.CombineTo(
1833 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1834 }
1835 }
1836 }
1837
1838 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1839 // are not demanded. This will likely allow the anyext to be folded away.
1840 // TODO - support non-uniform vector amounts.
1841 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1842 SDValue InnerOp = Op0.getOperand(0);
1843 EVT InnerVT = InnerOp.getValueType();
1844 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1845 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1846 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1847 SDValue NarrowShl = TLO.DAG.getNode(
1848 ISD::SHL, dl, InnerVT, InnerOp,
1849 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1850 return TLO.CombineTo(
1851 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1852 }
1853
1854 // Repeat the SHL optimization above in cases where an extension
1855 // intervenes: (shl (anyext (shr x, c1)), c2) to
1856 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1857 // aren't demanded (as above) and that the shifted upper c1 bits of
1858 // x aren't demanded.
1859 // TODO - support non-uniform vector amounts.
1860 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1861 InnerOp.hasOneUse()) {
1862 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1863 InnerOp, DemandedElts, Depth + 2)) {
1864 unsigned InnerShAmt = *SA2;
1865 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1866 DemandedBits.getActiveBits() <=
1867 (InnerBits - InnerShAmt + ShAmt) &&
1868 DemandedBits.countr_zero() >= ShAmt) {
1869 SDValue NewSA =
1870 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1871 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1872 InnerOp.getOperand(0));
1873 return TLO.CombineTo(
1874 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1875 }
1876 }
1877 }
1878 }
1879
1880 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1881 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1882 Depth + 1)) {
1883 // Disable the nsw and nuw flags. We can no longer guarantee that we
1884 // won't wrap after simplification.
1885 Op->dropFlags(SDNodeFlags::NoWrap);
1886 return true;
1887 }
1888 Known <<= ShAmt;
1889 // low bits known zero.
1890 Known.Zero.setLowBits(ShAmt);
1891
1892 // Attempt to avoid multi-use ops if we don't need anything from them.
1893 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1895 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1896 if (DemandedOp0) {
1897 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1898 return TLO.CombineTo(Op, NewOp);
1899 }
1900 }
1901
1902 // TODO: Can we merge this fold with the one below?
1903 // Try shrinking the operation as long as the shift amount will still be
1904 // in range.
1905 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1906 Op.getNode()->hasOneUse()) {
1907 // Search for the smallest integer type with free casts to and from
1908 // Op's type. For expedience, just check power-of-2 integer types.
1909 unsigned DemandedSize = DemandedBits.getActiveBits();
1910 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1911 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1912 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1913 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1914 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1915 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1916 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1917 assert(DemandedSize <= SmallVTBits &&
1918 "Narrowed below demanded bits?");
1919 // We found a type with free casts.
1920 SDValue NarrowShl = TLO.DAG.getNode(
1921 ISD::SHL, dl, SmallVT,
1922 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1923 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1924 return TLO.CombineTo(
1925 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1926 }
1927 }
1928 }
1929
1930 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1931 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1932 // Only do this if we demand the upper half so the knownbits are correct.
1933 unsigned HalfWidth = BitWidth / 2;
1934 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1935 DemandedBits.countLeadingOnes() >= HalfWidth) {
1936 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1937 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1938 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1939 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1940 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1941 // If we're demanding the upper bits at all, we must ensure
1942 // that the upper bits of the shift result are known to be zero,
1943 // which is equivalent to the narrow shift being NUW.
1944 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1945 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1946 SDNodeFlags Flags;
1947 Flags.setNoSignedWrap(IsNSW);
1948 Flags.setNoUnsignedWrap(IsNUW);
1949 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1950 SDValue NewShiftAmt =
1951 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1952 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1953 NewShiftAmt, Flags);
1954 SDValue NewExt =
1955 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1956 return TLO.CombineTo(Op, NewExt);
1957 }
1958 }
1959 }
1960 } else {
1961 // This is a variable shift, so we can't shift the demand mask by a known
1962 // amount. But if we are not demanding high bits, then we are not
1963 // demanding those bits from the pre-shifted operand either.
1964 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1965 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1966 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1967 Depth + 1)) {
1968 // Disable the nsw and nuw flags. We can no longer guarantee that we
1969 // won't wrap after simplification.
1970 Op->dropFlags(SDNodeFlags::NoWrap);
1971 return true;
1972 }
1973 Known.resetAll();
1974 }
1975 }
1976
1977 // If we are only demanding sign bits then we can use the shift source
1978 // directly.
1979 if (std::optional<unsigned> MaxSA =
1980 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1981 unsigned ShAmt = *MaxSA;
1982 unsigned NumSignBits =
1983 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1984 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1985 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1986 return TLO.CombineTo(Op, Op0);
1987 }
1988 break;
1989 }
1990 case ISD::SRL: {
1991 SDValue Op0 = Op.getOperand(0);
1992 SDValue Op1 = Op.getOperand(1);
1993 EVT ShiftVT = Op1.getValueType();
1994
1995 if (std::optional<unsigned> KnownSA =
1996 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1997 unsigned ShAmt = *KnownSA;
1998 if (ShAmt == 0)
1999 return TLO.CombineTo(Op, Op0);
2000
2001 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
2002 // single shift. We can do this if the top bits (which are shifted out)
2003 // are never demanded.
2004 // TODO - support non-uniform vector amounts.
2005 if (Op0.getOpcode() == ISD::SHL) {
2006 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2007 if (std::optional<unsigned> InnerSA =
2008 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2009 unsigned C1 = *InnerSA;
2010 unsigned Opc = ISD::SRL;
2011 int Diff = ShAmt - C1;
2012 if (Diff < 0) {
2013 Diff = -Diff;
2014 Opc = ISD::SHL;
2015 }
2016 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
2017 return TLO.CombineTo(
2018 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
2019 }
2020 }
2021 }
2022
2023 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2024 // single sra. We can do this if the top bits are never demanded.
2025 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2026 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2027 if (std::optional<unsigned> InnerSA =
2028 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2029 unsigned C1 = *InnerSA;
2030 // Clamp the combined shift amount if it exceeds the bit width.
2031 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2032 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2033 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2034 Op0.getOperand(0), NewSA));
2035 }
2036 }
2037 }
2038
2039 APInt InDemandedMask = (DemandedBits << ShAmt);
2040
2041 // If the shift is exact, then it does demand the low bits (and knows that
2042 // they are zero).
2043 if (Op->getFlags().hasExact())
2044 InDemandedMask.setLowBits(ShAmt);
2045
2046 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2047 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2048 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2050 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2051 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2052 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2053 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2054 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2055 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2056 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2057 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2058 SDValue NewShiftAmt =
2059 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2060 SDValue NewShift =
2061 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2062 return TLO.CombineTo(
2063 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2064 }
2065 }
2066
2067 // Compute the new bits that are at the top now.
2068 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2069 Depth + 1))
2070 return true;
2071 Known >>= ShAmt;
2072 // High bits known zero.
2073 Known.Zero.setHighBits(ShAmt);
2074
2075 // Attempt to avoid multi-use ops if we don't need anything from them.
2076 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2078 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2079 if (DemandedOp0) {
2080 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2081 return TLO.CombineTo(Op, NewOp);
2082 }
2083 }
2084 } else {
2085 // Use generic knownbits computation as it has support for non-uniform
2086 // shift amounts.
2087 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2088 }
2089
2090 // If we are only demanding sign bits then we can use the shift source
2091 // directly.
2092 if (std::optional<unsigned> MaxSA =
2093 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2094 unsigned ShAmt = *MaxSA;
2095 // Must already be signbits in DemandedBits bounds, and can't demand any
2096 // shifted in zeroes.
2097 if (DemandedBits.countl_zero() >= ShAmt) {
2098 unsigned NumSignBits =
2099 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2100 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2101 return TLO.CombineTo(Op, Op0);
2102 }
2103 }
2104
2105 // Try to match AVG patterns (after shift simplification).
2106 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2107 DemandedElts, Depth + 1))
2108 return TLO.CombineTo(Op, AVG);
2109
2110 break;
2111 }
2112 case ISD::SRA: {
2113 SDValue Op0 = Op.getOperand(0);
2114 SDValue Op1 = Op.getOperand(1);
2115 EVT ShiftVT = Op1.getValueType();
2116
2117 // If we only want bits that already match the signbit then we don't need
2118 // to shift.
2119 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2120 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2121 NumHiDemandedBits)
2122 return TLO.CombineTo(Op, Op0);
2123
2124 // If this is an arithmetic shift right and only the low-bit is set, we can
2125 // always convert this into a logical shr, even if the shift amount is
2126 // variable. The low bit of the shift cannot be an input sign bit unless
2127 // the shift amount is >= the size of the datatype, which is undefined.
2128 if (DemandedBits.isOne())
2129 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2130
2131 if (std::optional<unsigned> KnownSA =
2132 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2133 unsigned ShAmt = *KnownSA;
2134 if (ShAmt == 0)
2135 return TLO.CombineTo(Op, Op0);
2136
2137 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2138 // supports sext_inreg.
2139 if (Op0.getOpcode() == ISD::SHL) {
2140 if (std::optional<unsigned> InnerSA =
2141 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2142 unsigned LowBits = BitWidth - ShAmt;
2143 EVT ExtVT = VT.changeElementType(
2144 *TLO.DAG.getContext(),
2145 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2146
2147 if (*InnerSA == ShAmt) {
2148 if (!TLO.LegalOperations() ||
2150 return TLO.CombineTo(
2151 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2152 Op0.getOperand(0),
2153 TLO.DAG.getValueType(ExtVT)));
2154
2155 // Even if we can't convert to sext_inreg, we might be able to
2156 // remove this shift pair if the input is already sign extended.
2157 unsigned NumSignBits =
2158 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2159 if (NumSignBits > ShAmt)
2160 return TLO.CombineTo(Op, Op0.getOperand(0));
2161 }
2162 }
2163 }
2164
2165 APInt InDemandedMask = (DemandedBits << ShAmt);
2166
2167 // If the shift is exact, then it does demand the low bits (and knows that
2168 // they are zero).
2169 if (Op->getFlags().hasExact())
2170 InDemandedMask.setLowBits(ShAmt);
2171
2172 // If any of the demanded bits are produced by the sign extension, we also
2173 // demand the input sign bit.
2174 if (DemandedBits.countl_zero() < ShAmt)
2175 InDemandedMask.setSignBit();
2176
2177 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2178 Depth + 1))
2179 return true;
2180 Known >>= ShAmt;
2181
2182 // If the input sign bit is known to be zero, or if none of the top bits
2183 // are demanded, turn this into an unsigned shift right.
2184 if (Known.Zero[BitWidth - ShAmt - 1] ||
2185 DemandedBits.countl_zero() >= ShAmt) {
2186 SDNodeFlags Flags;
2187 Flags.setExact(Op->getFlags().hasExact());
2188 return TLO.CombineTo(
2189 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2190 }
2191
2192 int Log2 = DemandedBits.exactLogBase2();
2193 if (Log2 >= 0) {
2194 // The bit must come from the sign.
2195 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2196 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2197 }
2198
2199 if (Known.One[BitWidth - ShAmt - 1])
2200 // New bits are known one.
2201 Known.One.setHighBits(ShAmt);
2202
2203 // Attempt to avoid multi-use ops if we don't need anything from them.
2204 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2206 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2207 if (DemandedOp0) {
2208 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2209 return TLO.CombineTo(Op, NewOp);
2210 }
2211 }
2212 }
2213
2214 // Try to match AVG patterns (after shift simplification).
2215 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2216 DemandedElts, Depth + 1))
2217 return TLO.CombineTo(Op, AVG);
2218
2219 break;
2220 }
2221 case ISD::FSHL:
2222 case ISD::FSHR: {
2223 SDValue Op0 = Op.getOperand(0);
2224 SDValue Op1 = Op.getOperand(1);
2225 SDValue Op2 = Op.getOperand(2);
2226 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2227
2228 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2229 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2230
2231 // For fshl, 0-shift returns the 1st arg.
2232 // For fshr, 0-shift returns the 2nd arg.
2233 if (Amt == 0) {
2234 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2235 Known, TLO, Depth + 1))
2236 return true;
2237 break;
2238 }
2239
2240 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2241 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2242 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2243 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2244 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2245 Depth + 1))
2246 return true;
2247 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2248 Depth + 1))
2249 return true;
2250
2251 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2252 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2253 Known = Known.unionWith(Known2);
2254
2255 // Attempt to avoid multi-use ops if we don't need anything from them.
2256 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2257 !DemandedElts.isAllOnes()) {
2259 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2261 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2262 if (DemandedOp0 || DemandedOp1) {
2263 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2264 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2265 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2266 DemandedOp1, Op2);
2267 return TLO.CombineTo(Op, NewOp);
2268 }
2269 }
2270 }
2271
2272 if (isPowerOf2_32(BitWidth)) {
2273 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2274 // iff we're guaranteed not to use Op0.
2275 // TODO: Add FSHL equivalent?
2276 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2277 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
2278 KnownBits KnownAmt =
2279 TLO.DAG.computeKnownBits(Op2, DemandedElts, Depth + 1);
2280 unsigned MaxShiftAmt =
2281 KnownAmt.getMaxValue().getLimitedValue(BitWidth - 1);
2282 // Check we don't demand any shifted bits outside Op1.
2283 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2284 EVT AmtVT = Op2.getValueType();
2285 SDValue NewAmt =
2286 TLO.DAG.getNode(ISD::AND, dl, AmtVT, Op2,
2287 TLO.DAG.getConstant(BitWidth - 1, dl, AmtVT));
2288 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, Op1, NewAmt);
2289 return TLO.CombineTo(Op, NewOp);
2290 }
2291 }
2292
2293 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2294 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2295 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, Known2, TLO,
2296 Depth + 1))
2297 return true;
2298 }
2299 break;
2300 }
2301 case ISD::ROTL:
2302 case ISD::ROTR: {
2303 SDValue Op0 = Op.getOperand(0);
2304 SDValue Op1 = Op.getOperand(1);
2305 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2306
2307 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2308 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2309 return TLO.CombineTo(Op, Op0);
2310
2311 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2312 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2313 unsigned RevAmt = BitWidth - Amt;
2314
2315 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2316 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2317 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2318 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2319 Depth + 1))
2320 return true;
2321
2322 // rot*(x, 0) --> x
2323 if (Amt == 0)
2324 return TLO.CombineTo(Op, Op0);
2325
2326 // See if we don't demand either half of the rotated bits.
2327 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2328 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2329 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2330 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2331 }
2332 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2333 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2334 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2335 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2336 }
2337 }
2338
2339 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2340 if (isPowerOf2_32(BitWidth)) {
2341 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2342 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2343 Depth + 1))
2344 return true;
2345 }
2346 break;
2347 }
2348 case ISD::SMIN:
2349 case ISD::SMAX:
2350 case ISD::UMIN:
2351 case ISD::UMAX: {
2352 unsigned Opc = Op.getOpcode();
2353 SDValue Op0 = Op.getOperand(0);
2354 SDValue Op1 = Op.getOperand(1);
2355
2356 // If we're only demanding signbits, then we can simplify to OR/AND node.
2357 unsigned BitOp =
2358 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2359 unsigned NumSignBits =
2360 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2361 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2362 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2363 if (NumSignBits >= NumDemandedUpperBits)
2364 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2365
2366 // Check if one arg is always less/greater than (or equal) to the other arg.
2367 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2368 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2369 switch (Opc) {
2370 case ISD::SMIN:
2371 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2372 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2373 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2374 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2375 Known = KnownBits::smin(Known0, Known1);
2376 break;
2377 case ISD::SMAX:
2378 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2379 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2380 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2381 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2382 Known = KnownBits::smax(Known0, Known1);
2383 break;
2384 case ISD::UMIN:
2385 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2386 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2387 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2388 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2389 Known = KnownBits::umin(Known0, Known1);
2390 break;
2391 case ISD::UMAX:
2392 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2393 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2394 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2395 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2396 Known = KnownBits::umax(Known0, Known1);
2397 break;
2398 }
2399 break;
2400 }
2401 case ISD::BITREVERSE: {
2402 SDValue Src = Op.getOperand(0);
2403 APInt DemandedSrcBits = DemandedBits.reverseBits();
2404 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2405 Depth + 1))
2406 return true;
2407 Known = Known2.reverseBits();
2408 break;
2409 }
2410 case ISD::BSWAP: {
2411 SDValue Src = Op.getOperand(0);
2412
2413 // If the only bits demanded come from one byte of the bswap result,
2414 // just shift the input byte into position to eliminate the bswap.
2415 unsigned NLZ = DemandedBits.countl_zero();
2416 unsigned NTZ = DemandedBits.countr_zero();
2417
2418 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2419 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2420 // have 14 leading zeros, round to 8.
2421 NLZ = alignDown(NLZ, 8);
2422 NTZ = alignDown(NTZ, 8);
2423 // If we need exactly one byte, we can do this transformation.
2424 if (BitWidth - NLZ - NTZ == 8) {
2425 // Replace this with either a left or right shift to get the byte into
2426 // the right place.
2427 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2428 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2429 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2430 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2431 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2432 return TLO.CombineTo(Op, NewOp);
2433 }
2434 }
2435
2436 APInt DemandedSrcBits = DemandedBits.byteSwap();
2437 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2438 Depth + 1))
2439 return true;
2440 Known = Known2.byteSwap();
2441 break;
2442 }
2443 case ISD::CTPOP: {
2444 // If only 1 bit is demanded, replace with PARITY as long as we're before
2445 // op legalization.
2446 // FIXME: Limit to scalars for now.
2447 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2448 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2449 Op.getOperand(0)));
2450
2451 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2452 break;
2453 }
2455 SDValue Op0 = Op.getOperand(0);
2456 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2457 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2458
2459 // If we only care about the highest bit, don't bother shifting right.
2460 if (DemandedBits.isSignMask()) {
2461 unsigned MinSignedBits =
2462 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2463 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2464 // However if the input is already sign extended we expect the sign
2465 // extension to be dropped altogether later and do not simplify.
2466 if (!AlreadySignExtended) {
2467 // Compute the correct shift amount type, which must be getShiftAmountTy
2468 // for scalar types after legalization.
2469 SDValue ShiftAmt =
2470 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2471 return TLO.CombineTo(Op,
2472 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2473 }
2474 }
2475
2476 // If none of the extended bits are demanded, eliminate the sextinreg.
2477 if (DemandedBits.getActiveBits() <= ExVTBits)
2478 return TLO.CombineTo(Op, Op0);
2479
2480 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2481
2482 // Since the sign extended bits are demanded, we know that the sign
2483 // bit is demanded.
2484 InputDemandedBits.setBit(ExVTBits - 1);
2485
2486 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2487 Depth + 1))
2488 return true;
2489
2490 // If the sign bit of the input is known set or clear, then we know the
2491 // top bits of the result.
2492
2493 // If the input sign bit is known zero, convert this into a zero extension.
2494 if (Known.Zero[ExVTBits - 1])
2495 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2496
2497 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2498 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2499 Known.One.setBitsFrom(ExVTBits);
2500 Known.Zero &= Mask;
2501 } else { // Input sign bit unknown
2502 Known.Zero &= Mask;
2503 Known.One &= Mask;
2504 }
2505 break;
2506 }
2507 case ISD::BUILD_PAIR: {
2508 EVT HalfVT = Op.getOperand(0).getValueType();
2509 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2510
2511 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2512 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2513
2514 KnownBits KnownLo, KnownHi;
2515
2516 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2517 return true;
2518
2519 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2520 return true;
2521
2522 Known = KnownHi.concat(KnownLo);
2523 break;
2524 }
2526 if (VT.isScalableVector())
2527 return false;
2528 [[fallthrough]];
2529 case ISD::ZERO_EXTEND: {
2530 SDValue Src = Op.getOperand(0);
2531 EVT SrcVT = Src.getValueType();
2532 unsigned InBits = SrcVT.getScalarSizeInBits();
2533 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2534 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2535
2536 // If none of the top bits are demanded, convert this into an any_extend.
2537 if (DemandedBits.getActiveBits() <= InBits) {
2538 // If we only need the non-extended bits of the bottom element
2539 // then we can just bitcast to the result.
2540 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2541 VT.getSizeInBits() == SrcVT.getSizeInBits())
2542 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2543
2544 unsigned Opc =
2546 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2547 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2548 }
2549
2550 APInt InDemandedBits = DemandedBits.trunc(InBits);
2551 APInt InDemandedElts = DemandedElts.zext(InElts);
2552 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2553 Depth + 1)) {
2554 Op->dropFlags(SDNodeFlags::NonNeg);
2555 return true;
2556 }
2557 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2558 Known = Known.zext(BitWidth);
2559
2560 // Attempt to avoid multi-use ops if we don't need anything from them.
2562 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2563 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2564 break;
2565 }
2567 if (VT.isScalableVector())
2568 return false;
2569 [[fallthrough]];
2570 case ISD::SIGN_EXTEND: {
2571 SDValue Src = Op.getOperand(0);
2572 EVT SrcVT = Src.getValueType();
2573 unsigned InBits = SrcVT.getScalarSizeInBits();
2574 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2575 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2576
2577 APInt InDemandedElts = DemandedElts.zext(InElts);
2578 APInt InDemandedBits = DemandedBits.trunc(InBits);
2579
2580 // Since some of the sign extended bits are demanded, we know that the sign
2581 // bit is demanded.
2582 InDemandedBits.setBit(InBits - 1);
2583
2584 // If none of the top bits are demanded, convert this into an any_extend.
2585 if (DemandedBits.getActiveBits() <= InBits) {
2586 // If we only need the non-extended bits of the bottom element
2587 // then we can just bitcast to the result.
2588 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2589 VT.getSizeInBits() == SrcVT.getSizeInBits())
2590 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2591
2592 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2594 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2595 InBits) {
2596 unsigned Opc =
2598 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2599 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2600 }
2601 }
2602
2603 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2604 Depth + 1))
2605 return true;
2606 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2607
2608 // If the sign bit is known one, the top bits match.
2609 Known = Known.sext(BitWidth);
2610
2611 // If the sign bit is known zero, convert this to a zero extend.
2612 if (Known.isNonNegative()) {
2613 unsigned Opc =
2615 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2616 SDNodeFlags Flags;
2617 if (!IsVecInReg)
2618 Flags |= SDNodeFlags::NonNeg;
2619 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2620 }
2621 }
2622
2623 // Attempt to avoid multi-use ops if we don't need anything from them.
2625 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2626 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2627 break;
2628 }
2630 if (VT.isScalableVector())
2631 return false;
2632 [[fallthrough]];
2633 case ISD::ANY_EXTEND: {
2634 SDValue Src = Op.getOperand(0);
2635 EVT SrcVT = Src.getValueType();
2636 unsigned InBits = SrcVT.getScalarSizeInBits();
2637 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2638 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2639
2640 // If we only need the bottom element then we can just bitcast.
2641 // TODO: Handle ANY_EXTEND?
2642 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2643 VT.getSizeInBits() == SrcVT.getSizeInBits())
2644 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2645
2646 APInt InDemandedBits = DemandedBits.trunc(InBits);
2647 APInt InDemandedElts = DemandedElts.zext(InElts);
2648 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2649 Depth + 1))
2650 return true;
2651 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2652 Known = Known.anyext(BitWidth);
2653
2654 // Attempt to avoid multi-use ops if we don't need anything from them.
2656 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2657 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2658 break;
2659 }
2660 case ISD::TRUNCATE: {
2661 SDValue Src = Op.getOperand(0);
2662
2663 // Simplify the input, using demanded bit information, and compute the known
2664 // zero/one bits live out.
2665 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2666 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2667 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2668 Depth + 1)) {
2669 // Disable the nsw and nuw flags. We can no longer guarantee that we
2670 // won't wrap after simplification.
2671 Op->dropFlags(SDNodeFlags::NoWrap);
2672 return true;
2673 }
2674 Known = Known.trunc(BitWidth);
2675
2676 // Attempt to avoid multi-use ops if we don't need anything from them.
2678 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2679 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2680
2681 // If the input is only used by this truncate, see if we can shrink it based
2682 // on the known demanded bits.
2683 switch (Src.getOpcode()) {
2684 default:
2685 break;
2686 case ISD::SRL:
2687 // Shrink SRL by a constant if none of the high bits shifted in are
2688 // demanded.
2689 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2690 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2691 // undesirable.
2692 break;
2693
2694 if (Src.getNode()->hasOneUse()) {
2695 if (isTruncateFree(Src, VT) &&
2696 !isTruncateFree(Src.getValueType(), VT)) {
2697 // If truncate is only free at trunc(srl), do not turn it into
2698 // srl(trunc). The check is done by first check the truncate is free
2699 // at Src's opcode(srl), then check the truncate is not done by
2700 // referencing sub-register. In test, if both trunc(srl) and
2701 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2702 // trunc(srl)'s trunc is free, trunc(srl) is better.
2703 break;
2704 }
2705
2706 std::optional<unsigned> ShAmtC =
2707 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2708 if (!ShAmtC || *ShAmtC >= BitWidth)
2709 break;
2710 unsigned ShVal = *ShAmtC;
2711
2712 APInt HighBits =
2713 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2714 HighBits.lshrInPlace(ShVal);
2715 HighBits = HighBits.trunc(BitWidth);
2716 if (!(HighBits & DemandedBits)) {
2717 // None of the shifted in bits are needed. Add a truncate of the
2718 // shift input, then shift it.
2719 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2720 SDValue NewTrunc =
2721 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2722 return TLO.CombineTo(
2723 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2724 }
2725 }
2726 break;
2727 }
2728
2729 break;
2730 }
2731 case ISD::AssertZext: {
2732 // AssertZext demands all of the high bits, plus any of the low bits
2733 // demanded by its users.
2734 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2736 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2737 TLO, Depth + 1))
2738 return true;
2739
2740 Known.Zero |= ~InMask;
2741 Known.One &= (~Known.Zero);
2742 break;
2743 }
2745 SDValue Src = Op.getOperand(0);
2746 SDValue Idx = Op.getOperand(1);
2747 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2748 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2749
2750 if (SrcEltCnt.isScalable())
2751 return false;
2752
2753 // Demand the bits from every vector element without a constant index.
2754 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2755 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2756 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2757 if (CIdx->getAPIntValue().ult(NumSrcElts))
2758 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2759
2760 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2761 // anything about the extended bits.
2762 APInt DemandedSrcBits = DemandedBits;
2763 if (BitWidth > EltBitWidth)
2764 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2765
2766 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2767 Depth + 1))
2768 return true;
2769
2770 // Attempt to avoid multi-use ops if we don't need anything from them.
2771 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2772 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2773 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2774 SDValue NewOp =
2775 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2776 return TLO.CombineTo(Op, NewOp);
2777 }
2778 }
2779
2780 Known = Known2;
2781 if (BitWidth > EltBitWidth)
2782 Known = Known.anyext(BitWidth);
2783 break;
2784 }
2785 case ISD::BITCAST: {
2786 if (VT.isScalableVector())
2787 return false;
2788 SDValue Src = Op.getOperand(0);
2789 EVT SrcVT = Src.getValueType();
2790 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2791
2792 // If this is an FP->Int bitcast and if the sign bit is the only
2793 // thing demanded, turn this into a FGETSIGN.
2794 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2795 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2796 SrcVT.isFloatingPoint()) {
2798 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2799 // place. We expect the SHL to be eliminated by other optimizations.
2800 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, VT, Src);
2801 unsigned ShVal = Op.getValueSizeInBits() - 1;
2802 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2803 return TLO.CombineTo(Op,
2804 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2805 }
2806 }
2807
2808 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2809 // Demand the elt/bit if any of the original elts/bits are demanded.
2810 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2811 unsigned Scale = BitWidth / NumSrcEltBits;
2812 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2813 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2814 for (unsigned i = 0; i != Scale; ++i) {
2815 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2816 unsigned BitOffset = EltOffset * NumSrcEltBits;
2817 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2818 }
2819 // Recursive calls below may turn not demanded elements into poison, so we
2820 // need to demand all smaller source elements that maps to a demanded
2821 // destination element.
2822 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2823
2824 APInt KnownSrcUndef, KnownSrcZero;
2825 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2826 KnownSrcZero, TLO, Depth + 1))
2827 return true;
2828
2829 KnownBits KnownSrcBits;
2830 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2831 KnownSrcBits, TLO, Depth + 1))
2832 return true;
2833 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2834 // TODO - bigendian once we have test coverage.
2835 unsigned Scale = NumSrcEltBits / BitWidth;
2836 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2837 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2838 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2839 for (unsigned i = 0; i != NumElts; ++i)
2840 if (DemandedElts[i]) {
2841 unsigned Offset = (i % Scale) * BitWidth;
2842 DemandedSrcBits.insertBits(DemandedBits, Offset);
2843 DemandedSrcElts.setBit(i / Scale);
2844 }
2845
2846 if (SrcVT.isVector()) {
2847 APInt KnownSrcUndef, KnownSrcZero;
2848 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2849 KnownSrcZero, TLO, Depth + 1))
2850 return true;
2851 }
2852
2853 KnownBits KnownSrcBits;
2854 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2855 KnownSrcBits, TLO, Depth + 1))
2856 return true;
2857
2858 // Attempt to avoid multi-use ops if we don't need anything from them.
2859 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2860 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2861 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2862 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2863 return TLO.CombineTo(Op, NewOp);
2864 }
2865 }
2866 }
2867
2868 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2869 // recursive call where Known may be useful to the caller.
2870 if (Depth > 0) {
2871 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2872 return false;
2873 }
2874 break;
2875 }
2876 case ISD::MUL:
2877 if (DemandedBits.isPowerOf2()) {
2878 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2879 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2880 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2881 unsigned CTZ = DemandedBits.countr_zero();
2882 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2883 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2884 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2885 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2886 return TLO.CombineTo(Op, Shl);
2887 }
2888 }
2889 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2890 // X * X is odd iff X is odd.
2891 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2892 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2893 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2894 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2895 return TLO.CombineTo(Op, And1);
2896 }
2897 [[fallthrough]];
2898 case ISD::PTRADD:
2899 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2900 break;
2901 // PTRADD behaves like ADD if pointers are represented as integers.
2902 [[fallthrough]];
2903 case ISD::ADD:
2904 case ISD::SUB: {
2905 // Add, Sub, and Mul don't demand any bits in positions beyond that
2906 // of the highest bit demanded of them.
2907 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2908 SDNodeFlags Flags = Op.getNode()->getFlags();
2909 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2910 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2911 KnownBits KnownOp0, KnownOp1;
2912 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2913 const KnownBits &KnownRHS) {
2914 if (Op.getOpcode() == ISD::MUL)
2915 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2916 return Demanded;
2917 };
2918 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2919 Depth + 1) ||
2920 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2921 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2922 // See if the operation should be performed at a smaller bit width.
2924 // Disable the nsw and nuw flags. We can no longer guarantee that we
2925 // won't wrap after simplification.
2926 Op->dropFlags(SDNodeFlags::NoWrap);
2927 return true;
2928 }
2929
2930 // neg x with only low bit demanded is simply x.
2931 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2932 isNullConstant(Op0))
2933 return TLO.CombineTo(Op, Op1);
2934
2935 // Attempt to avoid multi-use ops if we don't need anything from them.
2936 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2938 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2940 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2941 if (DemandedOp0 || DemandedOp1) {
2942 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2943 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2944 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2945 Flags & ~SDNodeFlags::NoWrap);
2946 return TLO.CombineTo(Op, NewOp);
2947 }
2948 }
2949
2950 // If we have a constant operand, we may be able to turn it into -1 if we
2951 // do not demand the high bits. This can make the constant smaller to
2952 // encode, allow more general folding, or match specialized instruction
2953 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2954 // is probably not useful (and could be detrimental).
2956 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2957 if (C && !C->isAllOnes() && !C->isOne() &&
2958 (C->getAPIntValue() | HighMask).isAllOnes()) {
2959 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2960 // Disable the nsw and nuw flags. We can no longer guarantee that we
2961 // won't wrap after simplification.
2962 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2963 Flags & ~SDNodeFlags::NoWrap);
2964 return TLO.CombineTo(Op, NewOp);
2965 }
2966
2967 // Match a multiply with a disguised negated-power-of-2 and convert to a
2968 // an equivalent shift-left amount.
2969 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2970 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2971 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2972 return 0;
2973
2974 // Don't touch opaque constants. Also, ignore zero and power-of-2
2975 // multiplies. Those will get folded later.
2976 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2977 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2978 !MulC->getAPIntValue().isPowerOf2()) {
2979 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2980 if (UnmaskedC.isNegatedPowerOf2())
2981 return (-UnmaskedC).logBase2();
2982 }
2983 return 0;
2984 };
2985
2986 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2987 unsigned ShlAmt) {
2988 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2989 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2990 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2991 return TLO.CombineTo(Op, Res);
2992 };
2993
2995 if (Op.getOpcode() == ISD::ADD) {
2996 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2997 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2998 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2999 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
3000 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3001 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
3002 }
3003 if (Op.getOpcode() == ISD::SUB) {
3004 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3005 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3006 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
3007 }
3008 }
3009
3010 if (Op.getOpcode() == ISD::MUL) {
3011 Known = KnownBits::mul(KnownOp0, KnownOp1);
3012 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3014 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
3015 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
3016 }
3017 break;
3018 }
3019 case ISD::FABS: {
3020 SDValue Op0 = Op.getOperand(0);
3021 APInt SignMask = APInt::getSignMask(BitWidth);
3022
3023 if (!DemandedBits.intersects(SignMask))
3024 return TLO.CombineTo(Op, Op0);
3025
3026 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3027 Depth + 1))
3028 return true;
3029
3030 if (Known.isNonNegative())
3031 return TLO.CombineTo(Op, Op0);
3032 if (Known.isNegative())
3033 return TLO.CombineTo(
3034 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3035
3036 Known.Zero |= SignMask;
3037 Known.One &= ~SignMask;
3038
3039 break;
3040 }
3041 case ISD::FCOPYSIGN: {
3042 SDValue Op0 = Op.getOperand(0);
3043 SDValue Op1 = Op.getOperand(1);
3044
3045 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3046 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3047 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3048 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3049
3050 if (!DemandedBits.intersects(SignMask0))
3051 return TLO.CombineTo(Op, Op0);
3052
3053 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3054 Known, TLO, Depth + 1) ||
3055 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3056 Depth + 1))
3057 return true;
3058
3059 if (Known2.isNonNegative())
3060 return TLO.CombineTo(
3061 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3062
3063 if (Known2.isNegative())
3064 return TLO.CombineTo(
3065 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3066 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3067
3068 Known.Zero &= ~SignMask0;
3069 Known.One &= ~SignMask0;
3070 break;
3071 }
3072 case ISD::FNEG: {
3073 SDValue Op0 = Op.getOperand(0);
3074 APInt SignMask = APInt::getSignMask(BitWidth);
3075
3076 if (!DemandedBits.intersects(SignMask))
3077 return TLO.CombineTo(Op, Op0);
3078
3079 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3080 Depth + 1))
3081 return true;
3082
3083 if (!Known.isSignUnknown()) {
3084 Known.Zero ^= SignMask;
3085 Known.One ^= SignMask;
3086 }
3087
3088 break;
3089 }
3090 default:
3091 // We also ask the target about intrinsics (which could be specific to it).
3092 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3093 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3094 // TODO: Probably okay to remove after audit; here to reduce change size
3095 // in initial enablement patch for scalable vectors
3096 if (Op.getValueType().isScalableVector())
3097 break;
3099 Known, TLO, Depth))
3100 return true;
3101 break;
3102 }
3103
3104 // Just use computeKnownBits to compute output bits.
3105 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3106 break;
3107 }
3108
3109 // If we know the value of all of the demanded bits, return this as a
3110 // constant.
3112 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3113 // Avoid folding to a constant if any OpaqueConstant is involved.
3114 if (llvm::any_of(Op->ops(), [](SDValue V) {
3115 auto *C = dyn_cast<ConstantSDNode>(V);
3116 return C && C->isOpaque();
3117 }))
3118 return false;
3119 if (VT.isInteger())
3120 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3121 if (VT.isFloatingPoint())
3122 return TLO.CombineTo(
3123 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3124 dl, VT));
3125 }
3126
3127 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3128 // Try again just for the original demanded elts.
3129 // Ensure we do this AFTER constant folding above.
3130 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3131 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3132
3133 return false;
3134}
3135
3137 const APInt &DemandedElts,
3138 DAGCombinerInfo &DCI) const {
3139 SelectionDAG &DAG = DCI.DAG;
3140 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3141 !DCI.isBeforeLegalizeOps());
3142
3143 APInt KnownUndef, KnownZero;
3144 bool Simplified =
3145 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3146 if (Simplified) {
3147 DCI.AddToWorklist(Op.getNode());
3148 DCI.CommitTargetLoweringOpt(TLO);
3149 }
3150
3151 return Simplified;
3152}
3153
3154/// Given a vector binary operation and known undefined elements for each input
3155/// operand, compute whether each element of the output is undefined.
3157 const APInt &UndefOp0,
3158 const APInt &UndefOp1) {
3159 EVT VT = BO.getValueType();
3161 "Vector binop only");
3162
3163 EVT EltVT = VT.getVectorElementType();
3164 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3165 assert(UndefOp0.getBitWidth() == NumElts &&
3166 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3167
3168 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3169 const APInt &UndefVals) {
3170 if (UndefVals[Index])
3171 return DAG.getUNDEF(EltVT);
3172
3173 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3174 // Try hard to make sure that the getNode() call is not creating temporary
3175 // nodes. Ignore opaque integers because they do not constant fold.
3176 SDValue Elt = BV->getOperand(Index);
3177 auto *C = dyn_cast<ConstantSDNode>(Elt);
3178 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3179 return Elt;
3180 }
3181
3182 return SDValue();
3183 };
3184
3185 APInt KnownUndef = APInt::getZero(NumElts);
3186 for (unsigned i = 0; i != NumElts; ++i) {
3187 // If both inputs for this element are either constant or undef and match
3188 // the element type, compute the constant/undef result for this element of
3189 // the vector.
3190 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3191 // not handle FP constants. The code within getNode() should be refactored
3192 // to avoid the danger of creating a bogus temporary node here.
3193 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3194 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3195 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3196 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3197 KnownUndef.setBit(i);
3198 }
3199 return KnownUndef;
3200}
3201
3203 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3204 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3205 bool AssumeSingleUse) const {
3206 EVT VT = Op.getValueType();
3207 unsigned Opcode = Op.getOpcode();
3208 APInt DemandedElts = OriginalDemandedElts;
3209 unsigned NumElts = DemandedElts.getBitWidth();
3210 assert(VT.isVector() && "Expected vector op");
3211
3212 KnownUndef = KnownZero = APInt::getZero(NumElts);
3213
3215 return false;
3216
3217 // TODO: For now we assume we know nothing about scalable vectors.
3218 if (VT.isScalableVector())
3219 return false;
3220
3221 assert(VT.getVectorNumElements() == NumElts &&
3222 "Mask size mismatches value type element count!");
3223
3224 // Undef operand.
3225 if (Op.isUndef()) {
3226 KnownUndef.setAllBits();
3227 return false;
3228 }
3229
3230 // If Op has other users, assume that all elements are needed.
3231 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3232 DemandedElts.setAllBits();
3233
3234 // Not demanding any elements from Op.
3235 if (DemandedElts == 0) {
3236 KnownUndef.setAllBits();
3237 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3238 }
3239
3240 // Limit search depth.
3242 return false;
3243
3244 SDLoc DL(Op);
3245 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3246 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3247
3248 // Helper for demanding the specified elements and all the bits of both binary
3249 // operands.
3250 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3251 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3252 TLO.DAG, Depth + 1);
3253 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3254 TLO.DAG, Depth + 1);
3255 if (NewOp0 || NewOp1) {
3256 SDValue NewOp =
3257 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3258 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3259 return TLO.CombineTo(Op, NewOp);
3260 }
3261 return false;
3262 };
3263
3264 switch (Opcode) {
3265 case ISD::SCALAR_TO_VECTOR: {
3266 if (!DemandedElts[0]) {
3267 KnownUndef.setAllBits();
3268 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3269 }
3270 KnownUndef.setHighBits(NumElts - 1);
3271 break;
3272 }
3273 case ISD::BITCAST: {
3274 SDValue Src = Op.getOperand(0);
3275 EVT SrcVT = Src.getValueType();
3276
3277 if (!SrcVT.isVector()) {
3278 // TODO - bigendian once we have test coverage.
3279 if (IsLE) {
3280 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3281 unsigned EltSize = VT.getScalarSizeInBits();
3282 for (unsigned I = 0; I != NumElts; ++I) {
3283 if (DemandedElts[I]) {
3284 unsigned Offset = I * EltSize;
3285 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3286 }
3287 }
3288 KnownBits Known;
3289 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3290 return true;
3291 }
3292 break;
3293 }
3294
3295 // Fast handling of 'identity' bitcasts.
3296 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3297 if (NumSrcElts == NumElts)
3298 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3299 KnownZero, TLO, Depth + 1);
3300
3301 APInt SrcDemandedElts, SrcZero, SrcUndef;
3302
3303 // Bitcast from 'large element' src vector to 'small element' vector, we
3304 // must demand a source element if any DemandedElt maps to it.
3305 if ((NumElts % NumSrcElts) == 0) {
3306 unsigned Scale = NumElts / NumSrcElts;
3307 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3308 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3309 TLO, Depth + 1))
3310 return true;
3311
3312 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3313 // of the large element.
3314 // TODO - bigendian once we have test coverage.
3315 if (IsLE) {
3316 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3317 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3318 for (unsigned i = 0; i != NumElts; ++i)
3319 if (DemandedElts[i]) {
3320 unsigned Ofs = (i % Scale) * EltSizeInBits;
3321 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3322 }
3323
3324 KnownBits Known;
3325 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3326 TLO, Depth + 1))
3327 return true;
3328
3329 // The bitcast has split each wide element into a number of
3330 // narrow subelements. We have just computed the Known bits
3331 // for wide elements. See if element splitting results in
3332 // some subelements being zero. Only for demanded elements!
3333 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3334 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3335 .isAllOnes())
3336 continue;
3337 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3338 unsigned Elt = Scale * SrcElt + SubElt;
3339 if (DemandedElts[Elt])
3340 KnownZero.setBit(Elt);
3341 }
3342 }
3343 }
3344
3345 // If the src element is zero/undef then all the output elements will be -
3346 // only demanded elements are guaranteed to be correct.
3347 for (unsigned i = 0; i != NumSrcElts; ++i) {
3348 if (SrcDemandedElts[i]) {
3349 if (SrcZero[i])
3350 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3351 if (SrcUndef[i])
3352 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3353 }
3354 }
3355 }
3356
3357 // Bitcast from 'small element' src vector to 'large element' vector, we
3358 // demand all smaller source elements covered by the larger demanded element
3359 // of this vector.
3360 if ((NumSrcElts % NumElts) == 0) {
3361 unsigned Scale = NumSrcElts / NumElts;
3362 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3363 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3364 TLO, Depth + 1))
3365 return true;
3366
3367 // If all the src elements covering an output element are zero/undef, then
3368 // the output element will be as well, assuming it was demanded.
3369 for (unsigned i = 0; i != NumElts; ++i) {
3370 if (DemandedElts[i]) {
3371 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3372 KnownZero.setBit(i);
3373 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3374 KnownUndef.setBit(i);
3375 }
3376 }
3377 }
3378 break;
3379 }
3380 case ISD::FREEZE: {
3381 SDValue N0 = Op.getOperand(0);
3383 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
3384 return TLO.CombineTo(Op, N0);
3385
3386 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3387 // freeze(op(x, ...)) -> op(freeze(x), ...).
3388 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3389 return TLO.CombineTo(
3391 TLO.DAG.getFreeze(N0.getOperand(0))));
3392 break;
3393 }
3394 case ISD::BUILD_VECTOR: {
3395 // Check all elements and simplify any unused elements with UNDEF.
3396 if (!DemandedElts.isAllOnes()) {
3397 // Don't simplify BROADCASTS.
3398 if (llvm::any_of(Op->op_values(),
3399 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3401 bool Updated = false;
3402 for (unsigned i = 0; i != NumElts; ++i) {
3403 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3404 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3405 KnownUndef.setBit(i);
3406 Updated = true;
3407 }
3408 }
3409 if (Updated)
3410 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3411 }
3412 }
3413 for (unsigned i = 0; i != NumElts; ++i) {
3414 SDValue SrcOp = Op.getOperand(i);
3415 if (SrcOp.isUndef()) {
3416 KnownUndef.setBit(i);
3417 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3419 KnownZero.setBit(i);
3420 }
3421 }
3422 break;
3423 }
3424 case ISD::CONCAT_VECTORS: {
3425 EVT SubVT = Op.getOperand(0).getValueType();
3426 unsigned NumSubVecs = Op.getNumOperands();
3427 unsigned NumSubElts = SubVT.getVectorNumElements();
3428 for (unsigned i = 0; i != NumSubVecs; ++i) {
3429 SDValue SubOp = Op.getOperand(i);
3430 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3431 APInt SubUndef, SubZero;
3432 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3433 Depth + 1))
3434 return true;
3435 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3436 KnownZero.insertBits(SubZero, i * NumSubElts);
3437 }
3438
3439 // Attempt to avoid multi-use ops if we don't need anything from them.
3440 if (!DemandedElts.isAllOnes()) {
3441 bool FoundNewSub = false;
3442 SmallVector<SDValue, 2> DemandedSubOps;
3443 for (unsigned i = 0; i != NumSubVecs; ++i) {
3444 SDValue SubOp = Op.getOperand(i);
3445 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3447 SubOp, SubElts, TLO.DAG, Depth + 1);
3448 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3449 FoundNewSub = NewSubOp ? true : FoundNewSub;
3450 }
3451 if (FoundNewSub) {
3452 SDValue NewOp =
3453 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3454 return TLO.CombineTo(Op, NewOp);
3455 }
3456 }
3457 break;
3458 }
3459 case ISD::INSERT_SUBVECTOR: {
3460 // Demand any elements from the subvector and the remainder from the src it
3461 // is inserted into.
3462 SDValue Src = Op.getOperand(0);
3463 SDValue Sub = Op.getOperand(1);
3464 uint64_t Idx = Op.getConstantOperandVal(2);
3465 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3466 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3467 APInt DemandedSrcElts = DemandedElts;
3468 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3469
3470 // If none of the sub operand elements are demanded, bypass the insert.
3471 if (!DemandedSubElts)
3472 return TLO.CombineTo(Op, Src);
3473
3474 APInt SubUndef, SubZero;
3475 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3476 Depth + 1))
3477 return true;
3478
3479 // If none of the src operand elements are demanded, replace it with undef.
3480 if (!DemandedSrcElts && !Src.isUndef())
3481 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3482 TLO.DAG.getUNDEF(VT), Sub,
3483 Op.getOperand(2)));
3484
3485 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3486 TLO, Depth + 1))
3487 return true;
3488 KnownUndef.insertBits(SubUndef, Idx);
3489 KnownZero.insertBits(SubZero, Idx);
3490
3491 // Attempt to avoid multi-use ops if we don't need anything from them.
3492 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3494 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3496 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3497 if (NewSrc || NewSub) {
3498 NewSrc = NewSrc ? NewSrc : Src;
3499 NewSub = NewSub ? NewSub : Sub;
3500 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3501 NewSub, Op.getOperand(2));
3502 return TLO.CombineTo(Op, NewOp);
3503 }
3504 }
3505 break;
3506 }
3508 // Offset the demanded elts by the subvector index.
3509 SDValue Src = Op.getOperand(0);
3510 if (Src.getValueType().isScalableVector())
3511 break;
3512 uint64_t Idx = Op.getConstantOperandVal(1);
3513 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3514 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3515
3516 APInt SrcUndef, SrcZero;
3517 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3518 Depth + 1))
3519 return true;
3520 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3521 KnownZero = SrcZero.extractBits(NumElts, Idx);
3522
3523 // Attempt to avoid multi-use ops if we don't need anything from them.
3524 if (!DemandedElts.isAllOnes()) {
3526 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3527 if (NewSrc) {
3528 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3529 Op.getOperand(1));
3530 return TLO.CombineTo(Op, NewOp);
3531 }
3532 }
3533 break;
3534 }
3536 SDValue Vec = Op.getOperand(0);
3537 SDValue Scl = Op.getOperand(1);
3538 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3539
3540 // For a legal, constant insertion index, if we don't need this insertion
3541 // then strip it, else remove it from the demanded elts.
3542 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3543 unsigned Idx = CIdx->getZExtValue();
3544 if (!DemandedElts[Idx])
3545 return TLO.CombineTo(Op, Vec);
3546
3547 APInt DemandedVecElts(DemandedElts);
3548 DemandedVecElts.clearBit(Idx);
3549 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3550 KnownZero, TLO, Depth + 1))
3551 return true;
3552
3553 KnownUndef.setBitVal(Idx, Scl.isUndef());
3554
3555 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3556 break;
3557 }
3558
3559 APInt VecUndef, VecZero;
3560 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3561 Depth + 1))
3562 return true;
3563 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3564 break;
3565 }
3566 case ISD::VSELECT: {
3567 SDValue Sel = Op.getOperand(0);
3568 SDValue LHS = Op.getOperand(1);
3569 SDValue RHS = Op.getOperand(2);
3570
3571 // Try to transform the select condition based on the current demanded
3572 // elements.
3573 APInt UndefSel, ZeroSel;
3574 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3575 Depth + 1))
3576 return true;
3577
3578 // See if we can simplify either vselect operand.
3579 APInt DemandedLHS(DemandedElts);
3580 APInt DemandedRHS(DemandedElts);
3581 APInt UndefLHS, ZeroLHS;
3582 APInt UndefRHS, ZeroRHS;
3583 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3584 Depth + 1))
3585 return true;
3586 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3587 Depth + 1))
3588 return true;
3589
3590 KnownUndef = UndefLHS & UndefRHS;
3591 KnownZero = ZeroLHS & ZeroRHS;
3592
3593 // If we know that the selected element is always zero, we don't need the
3594 // select value element.
3595 APInt DemandedSel = DemandedElts & ~KnownZero;
3596 if (DemandedSel != DemandedElts)
3597 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3598 Depth + 1))
3599 return true;
3600
3601 break;
3602 }
3603 case ISD::VECTOR_SHUFFLE: {
3604 SDValue LHS = Op.getOperand(0);
3605 SDValue RHS = Op.getOperand(1);
3606 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3607
3608 // Collect demanded elements from shuffle operands..
3609 APInt DemandedLHS(NumElts, 0);
3610 APInt DemandedRHS(NumElts, 0);
3611 for (unsigned i = 0; i != NumElts; ++i) {
3612 int M = ShuffleMask[i];
3613 if (M < 0 || !DemandedElts[i])
3614 continue;
3615 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3616 if (M < (int)NumElts)
3617 DemandedLHS.setBit(M);
3618 else
3619 DemandedRHS.setBit(M - NumElts);
3620 }
3621
3622 // If either side isn't demanded, replace it by UNDEF. We handle this
3623 // explicitly here to also simplify in case of multiple uses (on the
3624 // contrary to the SimplifyDemandedVectorElts calls below).
3625 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3626 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3627 if (FoldLHS || FoldRHS) {
3628 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3629 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3630 SDValue NewOp =
3631 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3632 return TLO.CombineTo(Op, NewOp);
3633 }
3634
3635 // See if we can simplify either shuffle operand.
3636 APInt UndefLHS, ZeroLHS;
3637 APInt UndefRHS, ZeroRHS;
3638 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3639 Depth + 1))
3640 return true;
3641 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3642 Depth + 1))
3643 return true;
3644
3645 // Simplify mask using undef elements from LHS/RHS.
3646 bool Updated = false;
3647 bool IdentityLHS = true, IdentityRHS = true;
3648 SmallVector<int, 32> NewMask(ShuffleMask);
3649 for (unsigned i = 0; i != NumElts; ++i) {
3650 int &M = NewMask[i];
3651 if (M < 0)
3652 continue;
3653 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3654 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3655 Updated = true;
3656 M = -1;
3657 }
3658 IdentityLHS &= (M < 0) || (M == (int)i);
3659 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3660 }
3661
3662 // Update legal shuffle masks based on demanded elements if it won't reduce
3663 // to Identity which can cause premature removal of the shuffle mask.
3664 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3665 SDValue LegalShuffle =
3666 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3667 if (LegalShuffle)
3668 return TLO.CombineTo(Op, LegalShuffle);
3669 }
3670
3671 // Propagate undef/zero elements from LHS/RHS.
3672 for (unsigned i = 0; i != NumElts; ++i) {
3673 int M = ShuffleMask[i];
3674 if (M < 0) {
3675 KnownUndef.setBit(i);
3676 } else if (M < (int)NumElts) {
3677 if (UndefLHS[M])
3678 KnownUndef.setBit(i);
3679 if (ZeroLHS[M])
3680 KnownZero.setBit(i);
3681 } else {
3682 if (UndefRHS[M - NumElts])
3683 KnownUndef.setBit(i);
3684 if (ZeroRHS[M - NumElts])
3685 KnownZero.setBit(i);
3686 }
3687 }
3688 break;
3689 }
3693 APInt SrcUndef, SrcZero;
3694 SDValue Src = Op.getOperand(0);
3695 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3696 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3697 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3698 Depth + 1))
3699 return true;
3700 KnownZero = SrcZero.zextOrTrunc(NumElts);
3701 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3702
3703 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3704 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3705 DemandedSrcElts == 1) {
3706 // aext - if we just need the bottom element then we can bitcast.
3707 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3708 }
3709
3710 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3711 // zext(undef) upper bits are guaranteed to be zero.
3712 if (DemandedElts.isSubsetOf(KnownUndef))
3713 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3714 KnownUndef.clearAllBits();
3715
3716 // zext - if we just need the bottom element then we can mask:
3717 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3718 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3719 Op->isOnlyUserOf(Src.getNode()) &&
3720 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3721 SDLoc DL(Op);
3722 EVT SrcVT = Src.getValueType();
3723 EVT SrcSVT = SrcVT.getScalarType();
3724
3725 // If we're after type legalization and SrcSVT is not legal, use the
3726 // promoted type for creating constants to avoid creating nodes with
3727 // illegal types.
3729 SrcSVT = getLegalTypeToTransformTo(*TLO.DAG.getContext(), SrcSVT);
3730
3731 SmallVector<SDValue> MaskElts;
3732 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3733 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3734 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3735 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3736 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3737 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3738 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3739 }
3740 }
3741 }
3742 break;
3743 }
3744
3745 // TODO: There are more binop opcodes that could be handled here - MIN,
3746 // MAX, saturated math, etc.
3747 case ISD::ADD: {
3748 SDValue Op0 = Op.getOperand(0);
3749 SDValue Op1 = Op.getOperand(1);
3750 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3751 APInt UndefLHS, ZeroLHS;
3752 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3753 Depth + 1, /*AssumeSingleUse*/ true))
3754 return true;
3755 }
3756 [[fallthrough]];
3757 }
3758 case ISD::AVGCEILS:
3759 case ISD::AVGCEILU:
3760 case ISD::AVGFLOORS:
3761 case ISD::AVGFLOORU:
3762 case ISD::OR:
3763 case ISD::XOR:
3764 case ISD::SUB:
3765 case ISD::FADD:
3766 case ISD::FSUB:
3767 case ISD::FMUL:
3768 case ISD::FDIV:
3769 case ISD::FREM: {
3770 SDValue Op0 = Op.getOperand(0);
3771 SDValue Op1 = Op.getOperand(1);
3772
3773 APInt UndefRHS, ZeroRHS;
3774 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3775 Depth + 1))
3776 return true;
3777 APInt UndefLHS, ZeroLHS;
3778 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3779 Depth + 1))
3780 return true;
3781
3782 KnownZero = ZeroLHS & ZeroRHS;
3783 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3784
3785 // Attempt to avoid multi-use ops if we don't need anything from them.
3786 // TODO - use KnownUndef to relax the demandedelts?
3787 if (!DemandedElts.isAllOnes())
3788 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3789 return true;
3790 break;
3791 }
3792 case ISD::SHL:
3793 case ISD::SRL:
3794 case ISD::SRA:
3795 case ISD::ROTL:
3796 case ISD::ROTR: {
3797 SDValue Op0 = Op.getOperand(0);
3798 SDValue Op1 = Op.getOperand(1);
3799
3800 APInt UndefRHS, ZeroRHS;
3801 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3802 Depth + 1))
3803 return true;
3804 APInt UndefLHS, ZeroLHS;
3805 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3806 Depth + 1))
3807 return true;
3808
3809 KnownZero = ZeroLHS;
3810 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3811
3812 // Attempt to avoid multi-use ops if we don't need anything from them.
3813 // TODO - use KnownUndef to relax the demandedelts?
3814 if (!DemandedElts.isAllOnes())
3815 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3816 return true;
3817 break;
3818 }
3819 case ISD::MUL:
3820 case ISD::MULHU:
3821 case ISD::MULHS:
3822 case ISD::AND: {
3823 SDValue Op0 = Op.getOperand(0);
3824 SDValue Op1 = Op.getOperand(1);
3825
3826 APInt SrcUndef, SrcZero;
3827 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3828 Depth + 1))
3829 return true;
3830 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3831 // to demand it in Op0 - its guaranteed to be zero. There is however a
3832 // restriction, as we must not make any of the originally demanded elements
3833 // more poisonous. We could reduce amount of elements demanded, but then we
3834 // also need a to inform SimplifyDemandedVectorElts that some elements must
3835 // not be made more poisonous.
3836 if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3837 TLO, Depth + 1))
3838 return true;
3839
3840 KnownUndef &= DemandedElts;
3841 KnownZero &= DemandedElts;
3842
3843 // If every element pair has a zero/undef/poison then just fold to zero.
3844 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3845 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3846 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3847 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3848
3849 // If either side has a zero element, then the result element is zero, even
3850 // if the other is an UNDEF.
3851 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3852 // and then handle 'and' nodes with the rest of the binop opcodes.
3853 KnownZero |= SrcZero;
3854 KnownUndef &= SrcUndef;
3855 KnownUndef &= ~KnownZero;
3856
3857 // Attempt to avoid multi-use ops if we don't need anything from them.
3858 if (!DemandedElts.isAllOnes())
3859 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3860 return true;
3861 break;
3862 }
3863 case ISD::TRUNCATE:
3864 case ISD::SIGN_EXTEND:
3865 case ISD::ZERO_EXTEND:
3866 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3867 KnownZero, TLO, Depth + 1))
3868 return true;
3869
3870 if (!DemandedElts.isAllOnes())
3872 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3873 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3874
3875 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3876 // zext(undef) upper bits are guaranteed to be zero.
3877 if (DemandedElts.isSubsetOf(KnownUndef))
3878 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3879 KnownUndef.clearAllBits();
3880 }
3881 break;
3882 case ISD::SINT_TO_FP:
3883 case ISD::UINT_TO_FP:
3884 case ISD::FP_TO_SINT:
3885 case ISD::FP_TO_UINT:
3886 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3887 KnownZero, TLO, Depth + 1))
3888 return true;
3889 // Don't fall through to generic undef -> undef handling.
3890 return false;
3891 default: {
3892 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3893 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3894 KnownZero, TLO, Depth))
3895 return true;
3896 } else {
3897 KnownBits Known;
3898 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3899 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3900 TLO, Depth, AssumeSingleUse))
3901 return true;
3902 }
3903 break;
3904 }
3905 }
3906 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3907
3908 // Constant fold all undef cases.
3909 // TODO: Handle zero cases as well.
3910 if (DemandedElts.isSubsetOf(KnownUndef))
3911 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3912
3913 return false;
3914}
3915
3916/// Determine which of the bits specified in Mask are known to be either zero or
3917/// one and return them in the Known.
3919 KnownBits &Known,
3920 const APInt &DemandedElts,
3921 const SelectionDAG &DAG,
3922 unsigned Depth) const {
3923 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3924 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3925 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3926 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3927 "Should use MaskedValueIsZero if you don't know whether Op"
3928 " is a target node!");
3929 Known.resetAll();
3930}
3931
3934 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3935 unsigned Depth) const {
3936 Known.resetAll();
3937}
3938
3941 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3942 unsigned Depth) const {
3943 Known.resetAll();
3944}
3945
3947 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3948 // The low bits are known zero if the pointer is aligned.
3949 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3950}
3951
3957
3958/// This method can be implemented by targets that want to expose additional
3959/// information about sign bits to the DAG Combiner.
3961 const APInt &,
3962 const SelectionDAG &,
3963 unsigned Depth) const {
3964 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3965 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3966 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3967 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3968 "Should use ComputeNumSignBits if you don't know whether Op"
3969 " is a target node!");
3970 return 1;
3971}
3972
3974 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3975 const MachineRegisterInfo &MRI, unsigned Depth) const {
3976 return 1;
3977}
3978
3980 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3981 TargetLoweringOpt &TLO, unsigned Depth) const {
3982 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3983 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3984 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3985 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3986 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3987 " is a target node!");
3988 return false;
3989}
3990
3992 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3993 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3994 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3995 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3996 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3997 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3998 "Should use SimplifyDemandedBits if you don't know whether Op"
3999 " is a target node!");
4000 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
4001 return false;
4002}
4003
4005 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4006 SelectionDAG &DAG, unsigned Depth) const {
4007 assert(
4008 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4009 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4010 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4011 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4012 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4013 " is a target node!");
4014 return SDValue();
4015}
4016
4017SDValue
4020 SelectionDAG &DAG) const {
4021 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4022 if (!LegalMask) {
4023 std::swap(N0, N1);
4025 LegalMask = isShuffleMaskLegal(Mask, VT);
4026 }
4027
4028 if (!LegalMask)
4029 return SDValue();
4030
4031 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4032}
4033
4035 return nullptr;
4036}
4037
4039 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4040 UndefPoisonKind Kind, unsigned Depth) const {
4041 assert(
4042 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4043 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4044 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4045 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4046 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4047 " is a target node!");
4048
4049 // If Op can't create undef/poison and none of its operands are undef/poison
4050 // then Op is never undef/poison.
4051 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, Kind,
4052 /*ConsiderFlags*/ true, Depth) &&
4053 all_of(Op->ops(), [&](SDValue V) {
4054 return DAG.isGuaranteedNotToBeUndefOrPoison(V, Kind, Depth + 1);
4055 });
4056}
4057
4059 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4060 UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const {
4061 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4062 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4063 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4064 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4065 "Should use canCreateUndefOrPoison if you don't know whether Op"
4066 " is a target node!");
4067 // Be conservative and return true.
4068 return true;
4069}
4070
4072 KnownFPClass &Known,
4073 const APInt &DemandedElts,
4074 const SelectionDAG &DAG,
4075 unsigned Depth) const {
4076 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4077 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4078 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4079 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4080 "Should use computeKnownFPClass if you don't know whether Op"
4081 " is a target node!");
4082}
4083
4085 const APInt &DemandedElts,
4086 const SelectionDAG &DAG,
4087 bool SNaN,
4088 unsigned Depth) const {
4089 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4090 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4091 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4092 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4093 "Should use isKnownNeverNaN if you don't know whether Op"
4094 " is a target node!");
4095 return false;
4096}
4097
4099 const APInt &DemandedElts,
4100 APInt &UndefElts,
4101 const SelectionDAG &DAG,
4102 unsigned Depth) const {
4103 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4104 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4105 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4106 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4107 "Should use isSplatValue if you don't know whether Op"
4108 " is a target node!");
4109 return false;
4110}
4111
4112// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4113// work with truncating build vectors and vectors with elements of less than
4114// 8 bits.
4116 if (!N)
4117 return false;
4118
4119 unsigned EltWidth;
4120 APInt CVal;
4121 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4122 /*AllowTruncation=*/true)) {
4123 CVal = CN->getAPIntValue();
4124 EltWidth = N.getValueType().getScalarSizeInBits();
4125 } else
4126 return false;
4127
4128 // If this is a truncating splat, truncate the splat value.
4129 // Otherwise, we may fail to match the expected values below.
4130 if (EltWidth < CVal.getBitWidth())
4131 CVal = CVal.trunc(EltWidth);
4132
4133 switch (getBooleanContents(N.getValueType())) {
4135 return CVal[0];
4137 return CVal.isOne();
4139 return CVal.isAllOnes();
4140 }
4141
4142 llvm_unreachable("Invalid boolean contents");
4143}
4144
4146 if (!N)
4147 return false;
4148
4150 if (!CN) {
4152 if (!BV)
4153 return false;
4154
4155 // Only interested in constant splats, we don't care about undef
4156 // elements in identifying boolean constants and getConstantSplatNode
4157 // returns NULL if all ops are undef;
4158 CN = BV->getConstantSplatNode();
4159 if (!CN)
4160 return false;
4161 }
4162
4163 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4164 return !CN->getAPIntValue()[0];
4165
4166 return CN->isZero();
4167}
4168
4170 bool SExt) const {
4171 if (VT == MVT::i1)
4172 return N->isOne();
4173
4175 switch (Cnt) {
4177 // An extended value of 1 is always true, unless its original type is i1,
4178 // in which case it will be sign extended to -1.
4179 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4182 return N->isAllOnes() && SExt;
4183 }
4184 llvm_unreachable("Unexpected enumeration.");
4185}
4186
4187/// This helper function of SimplifySetCC tries to optimize the comparison when
4188/// either operand of the SetCC node is a bitwise-and instruction.
4189SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4190 ISD::CondCode Cond, const SDLoc &DL,
4191 DAGCombinerInfo &DCI) const {
4192 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4193 std::swap(N0, N1);
4194
4195 SelectionDAG &DAG = DCI.DAG;
4196 EVT OpVT = N0.getValueType();
4197 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4198 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4199 return SDValue();
4200
4201 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4202 // iff everything but LSB is known zero:
4203 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4206 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4207 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4208 if (DAG.MaskedValueIsZero(N0, UpperBits))
4209 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4210 }
4211
4212 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4213 // test in a narrow type that we can truncate to with no cost. Examples:
4214 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4215 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4216 // TODO: This conservatively checks for type legality on the source and
4217 // destination types. That may inhibit optimizations, but it also
4218 // allows setcc->shift transforms that may be more beneficial.
4219 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4220 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4221 isTypeLegal(OpVT) && N0.hasOneUse()) {
4222 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4223 AndC->getAPIntValue().getActiveBits());
4224 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4225 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4226 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4227 return DAG.getSetCC(DL, VT, Trunc, Zero,
4229 }
4230 }
4231
4232 // Match these patterns in any of their permutations:
4233 // (X & Y) == Y
4234 // (X & Y) != Y
4235 SDValue X, Y;
4236 if (N0.getOperand(0) == N1) {
4237 X = N0.getOperand(1);
4238 Y = N0.getOperand(0);
4239 } else if (N0.getOperand(1) == N1) {
4240 X = N0.getOperand(0);
4241 Y = N0.getOperand(1);
4242 } else {
4243 return SDValue();
4244 }
4245
4246 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4247 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4248 // its liable to create and infinite loop.
4249 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4250 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4252 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4253 // Note that where Y is variable and is known to have at most one bit set
4254 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4255 // equivalent when Y == 0.
4256 assert(OpVT.isInteger());
4258 if (DCI.isBeforeLegalizeOps() ||
4260 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4261 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4262 // If the target supports an 'and-not' or 'and-complement' logic operation,
4263 // try to use that to make a comparison operation more efficient.
4264 // But don't do this transform if the mask is a single bit because there are
4265 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4266 // 'rlwinm' on PPC).
4267
4268 // Bail out if the compare operand that we want to turn into a zero is
4269 // already a zero (otherwise, infinite loop).
4270 if (isNullConstant(Y))
4271 return SDValue();
4272
4273 // Transform this into: ~X & Y == 0.
4274 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4275 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4276 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4277 }
4278
4279 return SDValue();
4280}
4281
4282/// This helper function of SimplifySetCC tries to optimize the comparison when
4283/// either operand of the SetCC node is a bitwise-or instruction.
4284/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4285SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4286 ISD::CondCode Cond, const SDLoc &DL,
4287 DAGCombinerInfo &DCI) const {
4288 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4289 std::swap(N0, N1);
4290
4291 SelectionDAG &DAG = DCI.DAG;
4292 EVT OpVT = N0.getValueType();
4293 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4294 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4295 return SDValue();
4296
4297 // (X | Y) == Y
4298 // (X | Y) != Y
4299 SDValue X;
4300 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4301 // If the target supports an 'and-not' or 'and-complement' logic operation,
4302 // try to use that to make a comparison operation more efficient.
4303
4304 // Bail out if the compare operand that we want to turn into a zero is
4305 // already a zero (otherwise, infinite loop).
4306 if (isNullConstant(N1))
4307 return SDValue();
4308
4309 // Transform this into: X & ~Y ==/!= 0.
4310 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4311 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4312 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4313 }
4314
4315 return SDValue();
4316}
4317
4318/// There are multiple IR patterns that could be checking whether certain
4319/// truncation of a signed number would be lossy or not. The pattern which is
4320/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4321/// We are looking for the following pattern: (KeptBits is a constant)
4322/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4323/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4324/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4325/// We will unfold it into the natural trunc+sext pattern:
4326/// ((%x << C) a>> C) dstcond %x
4327/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4328SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4329 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4330 const SDLoc &DL) const {
4331 // We must be comparing with a constant.
4332 ConstantSDNode *C1;
4333 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4334 return SDValue();
4335
4336 // N0 should be: add %x, (1 << (KeptBits-1))
4337 if (N0->getOpcode() != ISD::ADD)
4338 return SDValue();
4339
4340 // And we must be 'add'ing a constant.
4341 ConstantSDNode *C01;
4342 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4343 return SDValue();
4344
4345 SDValue X = N0->getOperand(0);
4346 EVT XVT = X.getValueType();
4347
4348 // Validate constants ...
4349
4350 APInt I1 = C1->getAPIntValue();
4351
4352 ISD::CondCode NewCond;
4353 if (Cond == ISD::CondCode::SETULT) {
4354 NewCond = ISD::CondCode::SETEQ;
4355 } else if (Cond == ISD::CondCode::SETULE) {
4356 NewCond = ISD::CondCode::SETEQ;
4357 // But need to 'canonicalize' the constant.
4358 I1 += 1;
4359 } else if (Cond == ISD::CondCode::SETUGT) {
4360 NewCond = ISD::CondCode::SETNE;
4361 // But need to 'canonicalize' the constant.
4362 I1 += 1;
4363 } else if (Cond == ISD::CondCode::SETUGE) {
4364 NewCond = ISD::CondCode::SETNE;
4365 } else
4366 return SDValue();
4367
4368 APInt I01 = C01->getAPIntValue();
4369
4370 auto checkConstants = [&I1, &I01]() -> bool {
4371 // Both of them must be power-of-two, and the constant from setcc is bigger.
4372 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4373 };
4374
4375 if (checkConstants()) {
4376 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4377 } else {
4378 // What if we invert constants? (and the target predicate)
4379 I1.negate();
4380 I01.negate();
4381 assert(XVT.isInteger());
4382 NewCond = getSetCCInverse(NewCond, XVT);
4383 if (!checkConstants())
4384 return SDValue();
4385 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4386 }
4387
4388 // They are power-of-two, so which bit is set?
4389 const unsigned KeptBits = I1.logBase2();
4390 const unsigned KeptBitsMinusOne = I01.logBase2();
4391
4392 // Magic!
4393 if (KeptBits != (KeptBitsMinusOne + 1))
4394 return SDValue();
4395 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4396
4397 // We don't want to do this in every single case.
4398 SelectionDAG &DAG = DCI.DAG;
4399 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4400 return SDValue();
4401
4402 // Unfold into: sext_inreg(%x) cond %x
4403 // Where 'cond' will be either 'eq' or 'ne'.
4404 SDValue SExtInReg = DAG.getNode(
4406 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4407 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4408}
4409
4410// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4411SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4412 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4413 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4415 "Should be a comparison with 0.");
4416 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4417 "Valid only for [in]equality comparisons.");
4418
4419 unsigned NewShiftOpcode;
4420 SDValue X, C, Y;
4421
4422 SelectionDAG &DAG = DCI.DAG;
4423
4424 // Look for '(C l>>/<< Y)'.
4425 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4426 // The shift should be one-use.
4427 if (!V.hasOneUse())
4428 return false;
4429 unsigned OldShiftOpcode = V.getOpcode();
4430 switch (OldShiftOpcode) {
4431 case ISD::SHL:
4432 NewShiftOpcode = ISD::SRL;
4433 break;
4434 case ISD::SRL:
4435 NewShiftOpcode = ISD::SHL;
4436 break;
4437 default:
4438 return false; // must be a logical shift.
4439 }
4440 // We should be shifting a constant.
4441 // FIXME: best to use isConstantOrConstantVector().
4442 C = V.getOperand(0);
4443 ConstantSDNode *CC =
4444 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4445 if (!CC)
4446 return false;
4447 Y = V.getOperand(1);
4448
4449 ConstantSDNode *XC =
4450 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4452 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4453 };
4454
4455 // LHS of comparison should be an one-use 'and'.
4456 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4457 return SDValue();
4458
4459 X = N0.getOperand(0);
4460 SDValue Mask = N0.getOperand(1);
4461
4462 // 'and' is commutative!
4463 if (!Match(Mask)) {
4464 std::swap(X, Mask);
4465 if (!Match(Mask))
4466 return SDValue();
4467 }
4468
4469 EVT VT = X.getValueType();
4470
4471 // Produce:
4472 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4473 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4474 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4475 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4476 return T2;
4477}
4478
4479/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4480/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4481/// handle the commuted versions of these patterns.
4482SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4483 ISD::CondCode Cond, const SDLoc &DL,
4484 DAGCombinerInfo &DCI) const {
4485 unsigned BOpcode = N0.getOpcode();
4486 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4487 "Unexpected binop");
4488 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4489
4490 // (X + Y) == X --> Y == 0
4491 // (X - Y) == X --> Y == 0
4492 // (X ^ Y) == X --> Y == 0
4493 SelectionDAG &DAG = DCI.DAG;
4494 EVT OpVT = N0.getValueType();
4495 SDValue X = N0.getOperand(0);
4496 SDValue Y = N0.getOperand(1);
4497 if (X == N1)
4498 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4499
4500 if (Y != N1)
4501 return SDValue();
4502
4503 // (X + Y) == Y --> X == 0
4504 // (X ^ Y) == Y --> X == 0
4505 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4506 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4507
4508 // The shift would not be valid if the operands are boolean (i1).
4509 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4510 return SDValue();
4511
4512 // (X - Y) == Y --> X == Y << 1
4513 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4514 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4515 if (!DCI.isCalledByLegalizer())
4516 DCI.AddToWorklist(YShl1.getNode());
4517 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4518}
4519
4521 SDValue N0, const APInt &C1,
4522 ISD::CondCode Cond, const SDLoc &dl,
4523 SelectionDAG &DAG) {
4524 // Look through truncs that don't change the value of a ctpop.
4525 // FIXME: Add vector support? Need to be careful with setcc result type below.
4526 SDValue CTPOP = N0;
4527 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4529 CTPOP = N0.getOperand(0);
4530
4531 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4532 return SDValue();
4533
4534 EVT CTVT = CTPOP.getValueType();
4535 SDValue CTOp = CTPOP.getOperand(0);
4536
4537 // Expand a power-of-2-or-zero comparison based on ctpop:
4538 // (ctpop x) u< 2 -> (x & x-1) == 0
4539 // (ctpop x) u> 1 -> (x & x-1) != 0
4540 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4541 // Keep the CTPOP if it is a cheap vector op.
4542 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4543 return SDValue();
4544
4545 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4546 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4547 return SDValue();
4548 if (C1 == 0 && (Cond == ISD::SETULT))
4549 return SDValue(); // This is handled elsewhere.
4550
4551 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4552
4553 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4554 SDValue Result = CTOp;
4555 for (unsigned i = 0; i < Passes; i++) {
4556 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4557 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4558 }
4560 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4561 }
4562
4563 // Expand a power-of-2 comparison based on ctpop
4564 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4565 // Keep the CTPOP if it is cheap.
4566 if (TLI.isCtpopFast(CTVT))
4567 return SDValue();
4568
4569 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4570 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4571 assert(CTVT.isInteger());
4572 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4573
4574 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4575 // check before emitting a potentially unnecessary op.
4576 if (DAG.isKnownNeverZero(CTOp)) {
4577 // (ctpop x) == 1 --> (x & x-1) == 0
4578 // (ctpop x) != 1 --> (x & x-1) != 0
4579 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4580 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4581 return RHS;
4582 }
4583
4584 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4585 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4586 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4588 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4589 }
4590
4591 return SDValue();
4592}
4593
4595 ISD::CondCode Cond, const SDLoc &dl,
4596 SelectionDAG &DAG) {
4597 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4598 return SDValue();
4599
4600 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4601 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4602 return SDValue();
4603
4604 auto getRotateSource = [](SDValue X) {
4605 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4606 return X.getOperand(0);
4607 return SDValue();
4608 };
4609
4610 // Peek through a rotated value compared against 0 or -1:
4611 // (rot X, Y) == 0/-1 --> X == 0/-1
4612 // (rot X, Y) != 0/-1 --> X != 0/-1
4613 if (SDValue R = getRotateSource(N0))
4614 return DAG.getSetCC(dl, VT, R, N1, Cond);
4615
4616 // Peek through an 'or' of a rotated value compared against 0:
4617 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4618 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4619 //
4620 // TODO: Add the 'and' with -1 sibling.
4621 // TODO: Recurse through a series of 'or' ops to find the rotate.
4622 EVT OpVT = N0.getValueType();
4623 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4624 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4625 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4626 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4627 }
4628 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4629 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4630 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4631 }
4632 }
4633
4634 return SDValue();
4635}
4636
4638 ISD::CondCode Cond, const SDLoc &dl,
4639 SelectionDAG &DAG) {
4640 // If we are testing for all-bits-clear, we might be able to do that with
4641 // less shifting since bit-order does not matter.
4642 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4643 return SDValue();
4644
4645 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4646 if (!C1 || !C1->isZero())
4647 return SDValue();
4648
4649 if (!N0.hasOneUse() ||
4650 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4651 return SDValue();
4652
4653 unsigned BitWidth = N0.getScalarValueSizeInBits();
4654 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4655 if (!ShAmtC)
4656 return SDValue();
4657
4658 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4659 if (ShAmt == 0)
4660 return SDValue();
4661
4662 // Canonicalize fshr as fshl to reduce pattern-matching.
4663 if (N0.getOpcode() == ISD::FSHR)
4664 ShAmt = BitWidth - ShAmt;
4665
4666 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4667 SDValue X, Y;
4668 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4669 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4670 return false;
4671 if (Or.getOperand(0) == Other) {
4672 X = Or.getOperand(0);
4673 Y = Or.getOperand(1);
4674 return true;
4675 }
4676 if (Or.getOperand(1) == Other) {
4677 X = Or.getOperand(1);
4678 Y = Or.getOperand(0);
4679 return true;
4680 }
4681 return false;
4682 };
4683
4684 EVT OpVT = N0.getValueType();
4685 EVT ShAmtVT = N0.getOperand(2).getValueType();
4686 SDValue F0 = N0.getOperand(0);
4687 SDValue F1 = N0.getOperand(1);
4688 if (matchOr(F0, F1)) {
4689 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4690 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4691 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4692 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4693 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4694 }
4695 if (matchOr(F1, F0)) {
4696 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4697 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4698 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4699 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4700 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4701 }
4702
4703 return SDValue();
4704}
4705
4706/// Try to simplify a setcc built with the specified operands and cc. If it is
4707/// unable to simplify it, return a null SDValue.
4709 ISD::CondCode Cond, bool foldBooleans,
4710 DAGCombinerInfo &DCI,
4711 const SDLoc &dl) const {
4712 SelectionDAG &DAG = DCI.DAG;
4713 const DataLayout &Layout = DAG.getDataLayout();
4714 EVT OpVT = N0.getValueType();
4715 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4716
4717 // Constant fold or commute setcc.
4718 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4719 return Fold;
4720
4721 bool N0ConstOrSplat =
4722 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4723 bool N1ConstOrSplat =
4724 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4725
4726 // Canonicalize toward having the constant on the RHS.
4727 // TODO: Handle non-splat vector constants. All undef causes trouble.
4728 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4729 // infinite loop here when we encounter one.
4731 if (N0ConstOrSplat && !N1ConstOrSplat &&
4732 (DCI.isBeforeLegalizeOps() ||
4733 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4734 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4735
4736 // If we have a subtract with the same 2 non-constant operands as this setcc
4737 // -- but in reverse order -- then try to commute the operands of this setcc
4738 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4739 // instruction on some targets.
4740 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4741 (DCI.isBeforeLegalizeOps() ||
4742 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4743 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4744 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4745 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4746
4747 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4748 return V;
4749
4750 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4751 return V;
4752
4753 if (auto *N1C = isConstOrConstSplat(N1)) {
4754 const APInt &C1 = N1C->getAPIntValue();
4755
4756 // Optimize some CTPOP cases.
4757 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4758 return V;
4759
4760 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4761 // X * Y == 0 --> (X == 0) || (Y == 0)
4762 // X * Y != 0 --> (X != 0) && (Y != 0)
4763 // TODO: This bails out if minsize is set, but if the target doesn't have a
4764 // single instruction multiply for this type, it would likely be
4765 // smaller to decompose.
4766 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4767 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4768 (N0->getFlags().hasNoUnsignedWrap() ||
4769 N0->getFlags().hasNoSignedWrap()) &&
4770 !Attr.hasFnAttr(Attribute::MinSize)) {
4771 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4772 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4773 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4774 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4775 }
4776
4777 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4778 // equality comparison, then we're just comparing whether X itself is
4779 // zero.
4780 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4781 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4783 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4784 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4785 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4786 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4787 // (srl (ctlz x), 5) == 0 -> X != 0
4788 // (srl (ctlz x), 5) != 1 -> X != 0
4789 Cond = ISD::SETNE;
4790 } else {
4791 // (srl (ctlz x), 5) != 0 -> X == 0
4792 // (srl (ctlz x), 5) == 1 -> X == 0
4793 Cond = ISD::SETEQ;
4794 }
4795 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4796 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4797 Cond);
4798 }
4799 }
4800 }
4801 }
4802
4803 // setcc X, 0, setlt --> X (when X is all sign bits)
4804 // setcc X, 0, setne --> X (when X is all sign bits)
4805 //
4806 // When we know that X has 0 or -1 in each element (or scalar), this
4807 // comparison will produce X. This is only true when boolean contents are
4808 // represented via 0s and -1s.
4809 if (VT == OpVT &&
4810 // Check that the result of setcc is 0 and -1.
4812 // Match only for checks X < 0 and X != 0
4813 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4814 // The identity holds iff we know all sign bits for all lanes.
4816 return N0;
4817
4818 // FIXME: Support vectors.
4819 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4820 const APInt &C1 = N1C->getAPIntValue();
4821
4822 // (zext x) == C --> x == (trunc C)
4823 // (sext x) == C --> x == (trunc C)
4824 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4825 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4826 unsigned MinBits = N0.getValueSizeInBits();
4827 SDValue PreExt;
4828 bool Signed = false;
4829 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4830 // ZExt
4831 MinBits = N0->getOperand(0).getValueSizeInBits();
4832 PreExt = N0->getOperand(0);
4833 } else if (N0->getOpcode() == ISD::AND) {
4834 // DAGCombine turns costly ZExts into ANDs
4835 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4836 if ((C->getAPIntValue()+1).isPowerOf2()) {
4837 MinBits = C->getAPIntValue().countr_one();
4838 PreExt = N0->getOperand(0);
4839 }
4840 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4841 // SExt
4842 MinBits = N0->getOperand(0).getValueSizeInBits();
4843 PreExt = N0->getOperand(0);
4844 Signed = true;
4845 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4846 // ZEXTLOAD / SEXTLOAD
4847 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4848 MinBits = LN0->getMemoryVT().getSizeInBits();
4849 PreExt = N0;
4850 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4851 Signed = true;
4852 MinBits = LN0->getMemoryVT().getSizeInBits();
4853 PreExt = N0;
4854 }
4855 }
4856
4857 // Figure out how many bits we need to preserve this constant.
4858 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4859
4860 // Make sure we're not losing bits from the constant.
4861 if (MinBits > 0 &&
4862 MinBits < C1.getBitWidth() &&
4863 MinBits >= ReqdBits) {
4864 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4865 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4866 // Will get folded away.
4867 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4868 if (MinBits == 1 && C1 == 1)
4869 // Invert the condition.
4870 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4872 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4873 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4874 }
4875
4876 // If truncating the setcc operands is not desirable, we can still
4877 // simplify the expression in some cases:
4878 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4879 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4880 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4881 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4882 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4883 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4884 SDValue TopSetCC = N0->getOperand(0);
4885 unsigned N0Opc = N0->getOpcode();
4886 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4887 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4888 TopSetCC.getOpcode() == ISD::SETCC &&
4889 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4890 (isConstFalseVal(N1) ||
4891 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4892
4893 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4894 (!N1C->isZero() && Cond == ISD::SETNE);
4895
4896 if (!Inverse)
4897 return TopSetCC;
4898
4900 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4901 TopSetCC.getOperand(0).getValueType());
4902 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4903 TopSetCC.getOperand(1),
4904 InvCond);
4905 }
4906 }
4907 }
4908
4909 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4910 // equality or unsigned, and all 1 bits of the const are in the same
4911 // partial word, see if we can shorten the load.
4912 if (DCI.isBeforeLegalize() &&
4914 N0.getOpcode() == ISD::AND && C1 == 0 &&
4915 N0.getNode()->hasOneUse() &&
4916 isa<LoadSDNode>(N0.getOperand(0)) &&
4917 N0.getOperand(0).getNode()->hasOneUse() &&
4919 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4920 APInt bestMask;
4921 unsigned bestWidth = 0, bestOffset = 0;
4922 if (Lod->isSimple() && Lod->isUnindexed() &&
4923 (Lod->getMemoryVT().isByteSized() ||
4924 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4925 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4926 unsigned origWidth = N0.getValueSizeInBits();
4927 unsigned maskWidth = origWidth;
4928 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4929 // 8 bits, but have to be careful...
4930 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4931 origWidth = Lod->getMemoryVT().getSizeInBits();
4932 const APInt &Mask = N0.getConstantOperandAPInt(1);
4933 // Only consider power-of-2 widths (and at least one byte) as candiates
4934 // for the narrowed load.
4935 for (unsigned width = 8; width < origWidth; width *= 2) {
4936 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4937 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4938 // Avoid accessing any padding here for now (we could use memWidth
4939 // instead of origWidth here otherwise).
4940 unsigned maxOffset = origWidth - width;
4941 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4942 if (Mask.isSubsetOf(newMask)) {
4943 unsigned ptrOffset =
4944 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4945 unsigned IsFast = 0;
4946 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4947 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4949 ptrOffset / 8) &&
4951 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4952 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4953 IsFast) {
4954 bestOffset = ptrOffset / 8;
4955 bestMask = Mask.lshr(offset);
4956 bestWidth = width;
4957 break;
4958 }
4959 }
4960 newMask <<= 8;
4961 }
4962 if (bestWidth)
4963 break;
4964 }
4965 }
4966 if (bestWidth) {
4967 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4968 SDValue Ptr = Lod->getBasePtr();
4969 if (bestOffset != 0)
4970 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4971 SDValue NewLoad =
4972 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4973 Lod->getPointerInfo().getWithOffset(bestOffset),
4974 Lod->getBaseAlign());
4975 SDValue And =
4976 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4977 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4978 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4979 }
4980 }
4981
4982 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4983 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4984 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4985
4986 // If the comparison constant has bits in the upper part, the
4987 // zero-extended value could never match.
4989 C1.getBitWidth() - InSize))) {
4990 switch (Cond) {
4991 case ISD::SETUGT:
4992 case ISD::SETUGE:
4993 case ISD::SETEQ:
4994 return DAG.getConstant(0, dl, VT);
4995 case ISD::SETULT:
4996 case ISD::SETULE:
4997 case ISD::SETNE:
4998 return DAG.getConstant(1, dl, VT);
4999 case ISD::SETGT:
5000 case ISD::SETGE:
5001 // True if the sign bit of C1 is set.
5002 return DAG.getConstant(C1.isNegative(), dl, VT);
5003 case ISD::SETLT:
5004 case ISD::SETLE:
5005 // True if the sign bit of C1 isn't set.
5006 return DAG.getConstant(C1.isNonNegative(), dl, VT);
5007 default:
5008 break;
5009 }
5010 }
5011
5012 // Otherwise, we can perform the comparison with the low bits.
5013 switch (Cond) {
5014 case ISD::SETEQ:
5015 case ISD::SETNE:
5016 case ISD::SETUGT:
5017 case ISD::SETUGE:
5018 case ISD::SETULT:
5019 case ISD::SETULE: {
5020 EVT newVT = N0.getOperand(0).getValueType();
5021 // FIXME: Should use isNarrowingProfitable.
5022 if (DCI.isBeforeLegalizeOps() ||
5023 (isOperationLegal(ISD::SETCC, newVT) &&
5024 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
5026 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
5027 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
5028
5029 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
5030 NewConst, Cond);
5031 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
5032 }
5033 break;
5034 }
5035 default:
5036 break; // todo, be more careful with signed comparisons
5037 }
5038 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5039 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5041 OpVT)) {
5042 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
5043 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5044 EVT ExtDstTy = N0.getValueType();
5045 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5046
5047 // If the constant doesn't fit into the number of bits for the source of
5048 // the sign extension, it is impossible for both sides to be equal.
5049 if (C1.getSignificantBits() > ExtSrcTyBits)
5050 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
5051
5052 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5053 ExtDstTy != ExtSrcTy && "Unexpected types!");
5054 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5055 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5056 DAG.getConstant(Imm, dl, ExtDstTy));
5057 if (!DCI.isCalledByLegalizer())
5058 DCI.AddToWorklist(ZextOp.getNode());
5059 // Otherwise, make this a use of a zext.
5060 return DAG.getSetCC(dl, VT, ZextOp,
5061 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5062 } else if ((N1C->isZero() || N1C->isOne()) &&
5063 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5064 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5065 // excluded as they are handled below whilst checking for foldBooleans.
5066 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5067 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5068 (N0.getValueType() == MVT::i1 ||
5072 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5073 if (TrueWhenTrue)
5074 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5075 // Invert the condition.
5076 if (N0.getOpcode() == ISD::SETCC) {
5079 if (DCI.isBeforeLegalizeOps() ||
5081 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5082 }
5083 }
5084
5085 if ((N0.getOpcode() == ISD::XOR ||
5086 (N0.getOpcode() == ISD::AND &&
5087 N0.getOperand(0).getOpcode() == ISD::XOR &&
5088 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5089 isOneConstant(N0.getOperand(1))) {
5090 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5091 // can only do this if the top bits are known zero.
5092 unsigned BitWidth = N0.getValueSizeInBits();
5093 if (DAG.MaskedValueIsZero(N0,
5095 BitWidth-1))) {
5096 // Okay, get the un-inverted input value.
5097 SDValue Val;
5098 if (N0.getOpcode() == ISD::XOR) {
5099 Val = N0.getOperand(0);
5100 } else {
5101 assert(N0.getOpcode() == ISD::AND &&
5102 N0.getOperand(0).getOpcode() == ISD::XOR);
5103 // ((X^1)&1)^1 -> X & 1
5104 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5105 N0.getOperand(0).getOperand(0),
5106 N0.getOperand(1));
5107 }
5108
5109 return DAG.getSetCC(dl, VT, Val, N1,
5111 }
5112 } else if (N1C->isOne()) {
5113 SDValue Op0 = N0;
5114 if (Op0.getOpcode() == ISD::TRUNCATE)
5115 Op0 = Op0.getOperand(0);
5116
5117 if ((Op0.getOpcode() == ISD::XOR) &&
5118 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5119 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5120 SDValue XorLHS = Op0.getOperand(0);
5121 SDValue XorRHS = Op0.getOperand(1);
5122 // Ensure that the input setccs return an i1 type or 0/1 value.
5123 if (Op0.getValueType() == MVT::i1 ||
5128 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5130 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5131 }
5132 }
5133 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5134 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5135 if (Op0.getValueType().bitsGT(VT))
5136 Op0 = DAG.getNode(ISD::AND, dl, VT,
5137 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5138 DAG.getConstant(1, dl, VT));
5139 else if (Op0.getValueType().bitsLT(VT))
5140 Op0 = DAG.getNode(ISD::AND, dl, VT,
5141 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5142 DAG.getConstant(1, dl, VT));
5143
5144 return DAG.getSetCC(dl, VT, Op0,
5145 DAG.getConstant(0, dl, Op0.getValueType()),
5147 }
5148 if (Op0.getOpcode() == ISD::AssertZext &&
5149 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5150 return DAG.getSetCC(dl, VT, Op0,
5151 DAG.getConstant(0, dl, Op0.getValueType()),
5153 }
5154 }
5155
5156 // Given:
5157 // icmp eq/ne (urem %x, %y), 0
5158 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5159 // icmp eq/ne %x, 0
5160 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5161 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5162 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5163 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5164 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5165 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5166 }
5167
5168 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5169 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5170 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5172 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5173 N1C->isAllOnes()) {
5174 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5175 DAG.getConstant(0, dl, OpVT),
5177 }
5178
5179 // fold (setcc (trunc x) c) -> (setcc x c)
5180 if (N0.getOpcode() == ISD::TRUNCATE &&
5182 (N0->getFlags().hasNoSignedWrap() &&
5185 EVT NewVT = N0.getOperand(0).getValueType();
5186 SDValue NewConst = DAG.getConstant(
5188 ? C1.sext(NewVT.getSizeInBits())
5189 : C1.zext(NewVT.getSizeInBits()),
5190 dl, NewVT);
5191 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5192 }
5193
5194 if (SDValue V =
5195 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5196 return V;
5197 }
5198
5199 // These simplifications apply to splat vectors as well.
5200 // TODO: Handle more splat vector cases.
5201 if (auto *N1C = isConstOrConstSplat(N1)) {
5202 const APInt &C1 = N1C->getAPIntValue();
5203
5204 APInt MinVal, MaxVal;
5205 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5207 MinVal = APInt::getSignedMinValue(OperandBitSize);
5208 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5209 } else {
5210 MinVal = APInt::getMinValue(OperandBitSize);
5211 MaxVal = APInt::getMaxValue(OperandBitSize);
5212 }
5213
5214 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5215 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5216 // X >= MIN --> true
5217 if (C1 == MinVal)
5218 return DAG.getBoolConstant(true, dl, VT, OpVT);
5219
5220 if (!VT.isVector()) { // TODO: Support this for vectors.
5221 // X >= C0 --> X > (C0 - 1)
5222 APInt C = C1 - 1;
5224 if ((DCI.isBeforeLegalizeOps() ||
5225 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5226 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5227 isLegalICmpImmediate(C.getSExtValue())))) {
5228 return DAG.getSetCC(dl, VT, N0,
5229 DAG.getConstant(C, dl, N1.getValueType()),
5230 NewCC);
5231 }
5232 }
5233 }
5234
5235 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5236 // X <= MAX --> true
5237 if (C1 == MaxVal)
5238 return DAG.getBoolConstant(true, dl, VT, OpVT);
5239
5240 // X <= C0 --> X < (C0 + 1)
5241 if (!VT.isVector()) { // TODO: Support this for vectors.
5242 APInt C = C1 + 1;
5244 if ((DCI.isBeforeLegalizeOps() ||
5245 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5246 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5247 isLegalICmpImmediate(C.getSExtValue())))) {
5248 return DAG.getSetCC(dl, VT, N0,
5249 DAG.getConstant(C, dl, N1.getValueType()),
5250 NewCC);
5251 }
5252 }
5253 }
5254
5255 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5256 if (C1 == MinVal)
5257 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5258
5259 // TODO: Support this for vectors after legalize ops.
5260 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5261 // Canonicalize setlt X, Max --> setne X, Max
5262 if (C1 == MaxVal)
5263 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5264
5265 // If we have setult X, 1, turn it into seteq X, 0
5266 if (C1 == MinVal+1)
5267 return DAG.getSetCC(dl, VT, N0,
5268 DAG.getConstant(MinVal, dl, N0.getValueType()),
5269 ISD::SETEQ);
5270 }
5271 }
5272
5273 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5274 if (C1 == MaxVal)
5275 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5276
5277 // TODO: Support this for vectors after legalize ops.
5278 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5279 // Canonicalize setgt X, Min --> setne X, Min
5280 if (C1 == MinVal)
5281 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5282
5283 // If we have setugt X, Max-1, turn it into seteq X, Max
5284 if (C1 == MaxVal-1)
5285 return DAG.getSetCC(dl, VT, N0,
5286 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5287 ISD::SETEQ);
5288 }
5289 }
5290
5291 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5292 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5293 if (C1.isZero())
5294 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5295 VT, N0, N1, Cond, DCI, dl))
5296 return CC;
5297
5298 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5299 // For example, when high 32-bits of i64 X are known clear:
5300 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5301 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5302 bool CmpZero = N1C->isZero();
5303 bool CmpNegOne = N1C->isAllOnes();
5304 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5305 // Match or(lo,shl(hi,bw/2)) pattern.
5306 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5307 unsigned EltBits = V.getScalarValueSizeInBits();
5308 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5309 return false;
5310 SDValue LHS = V.getOperand(0);
5311 SDValue RHS = V.getOperand(1);
5312 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5313 // Unshifted element must have zero upperbits.
5314 if (RHS.getOpcode() == ISD::SHL &&
5315 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5316 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5317 DAG.MaskedValueIsZero(LHS, HiBits)) {
5318 Lo = LHS;
5319 Hi = RHS.getOperand(0);
5320 return true;
5321 }
5322 if (LHS.getOpcode() == ISD::SHL &&
5323 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5324 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5325 DAG.MaskedValueIsZero(RHS, HiBits)) {
5326 Lo = RHS;
5327 Hi = LHS.getOperand(0);
5328 return true;
5329 }
5330 return false;
5331 };
5332
5333 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5334 unsigned EltBits = N0.getScalarValueSizeInBits();
5335 unsigned HalfBits = EltBits / 2;
5336 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5337 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5338 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5339 SDValue NewN0 =
5340 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5341 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5342 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5343 };
5344
5345 SDValue Lo, Hi;
5346 if (IsConcat(N0, Lo, Hi))
5347 return MergeConcat(Lo, Hi);
5348
5349 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5350 SDValue Lo0, Lo1, Hi0, Hi1;
5351 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5352 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5353 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5354 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5355 }
5356 }
5357 }
5358 }
5359
5360 // If we have "setcc X, C0", check to see if we can shrink the immediate
5361 // by changing cc.
5362 // TODO: Support this for vectors after legalize ops.
5363 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5364 // SETUGT X, SINTMAX -> SETLT X, 0
5365 // SETUGE X, SINTMIN -> SETLT X, 0
5366 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5367 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5368 return DAG.getSetCC(dl, VT, N0,
5369 DAG.getConstant(0, dl, N1.getValueType()),
5370 ISD::SETLT);
5371
5372 // SETULT X, SINTMIN -> SETGT X, -1
5373 // SETULE X, SINTMAX -> SETGT X, -1
5374 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5375 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5376 return DAG.getSetCC(dl, VT, N0,
5377 DAG.getAllOnesConstant(dl, N1.getValueType()),
5378 ISD::SETGT);
5379 }
5380 }
5381
5382 // Back to non-vector simplifications.
5383 // TODO: Can we do these for vector splats?
5384 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5385 const APInt &C1 = N1C->getAPIntValue();
5386 EVT ShValTy = N0.getValueType();
5387
5388 // Fold bit comparisons when we can. This will result in an
5389 // incorrect value when boolean false is negative one, unless
5390 // the bitsize is 1 in which case the false value is the same
5391 // in practice regardless of the representation.
5392 if ((VT.getSizeInBits() == 1 ||
5394 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5395 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5396 N0.getOpcode() == ISD::AND) {
5397 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5398 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5399 // Perform the xform if the AND RHS is a single bit.
5400 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5401 if (AndRHS->getAPIntValue().isPowerOf2() &&
5402 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5403 return DAG.getNode(
5404 ISD::TRUNCATE, dl, VT,
5405 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5406 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5407 }
5408 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5409 // (X & 8) == 8 --> (X & 8) >> 3
5410 // Perform the xform if C1 is a single bit.
5411 unsigned ShCt = C1.logBase2();
5412 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5413 return DAG.getNode(
5414 ISD::TRUNCATE, dl, VT,
5415 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5416 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5417 }
5418 }
5419 }
5420 }
5421
5422 if (C1.getSignificantBits() <= 64 &&
5424 // (X & -256) == 256 -> (X >> 8) == 1
5425 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5426 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5427 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5428 const APInt &AndRHSC = AndRHS->getAPIntValue();
5429 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5430 unsigned ShiftBits = AndRHSC.countr_zero();
5431 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5432 // If using an unsigned shift doesn't yield a legal compare
5433 // immediate, try using sra instead.
5434 APInt NewC = C1.lshr(ShiftBits);
5435 if (NewC.getSignificantBits() <= 64 &&
5437 APInt SignedC = C1.ashr(ShiftBits);
5438 if (SignedC.getSignificantBits() <= 64 &&
5440 SDValue Shift = DAG.getNode(
5441 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5442 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5443 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5444 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5445 }
5446 }
5447 SDValue Shift = DAG.getNode(
5448 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5449 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5450 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5451 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5452 }
5453 }
5454 }
5455 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5456 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5457 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5458 // X < 0x100000000 -> (X >> 32) < 1
5459 // X >= 0x100000000 -> (X >> 32) >= 1
5460 // X <= 0x0ffffffff -> (X >> 32) < 1
5461 // X > 0x0ffffffff -> (X >> 32) >= 1
5462 unsigned ShiftBits;
5463 APInt NewC = C1;
5464 ISD::CondCode NewCond = Cond;
5465 if (AdjOne) {
5466 ShiftBits = C1.countr_one();
5467 NewC = NewC + 1;
5468 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5469 } else {
5470 ShiftBits = C1.countr_zero();
5471 }
5472 NewC.lshrInPlace(ShiftBits);
5473 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5475 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5476 SDValue Shift =
5477 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5478 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5479 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5480 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5481 }
5482 }
5483 }
5484 }
5485
5487 auto *CFP = cast<ConstantFPSDNode>(N1);
5488 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5489
5490 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5491 // constant if knowing that the operand is non-nan is enough. We prefer to
5492 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5493 // materialize 0.0.
5494 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5495 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5496
5497 // setcc (fneg x), C -> setcc swap(pred) x, -C
5498 if (N0.getOpcode() == ISD::FNEG) {
5500 if (DCI.isBeforeLegalizeOps() ||
5501 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5502 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5503 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5504 }
5505 }
5506
5507 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5509 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5510 bool IsFabs = N0.getOpcode() == ISD::FABS;
5511 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5512 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5513 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5514 : (IsFabs ? fcInf : fcPosInf);
5515 if (Cond == ISD::SETUEQ)
5516 Flag |= fcNan;
5517 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5518 DAG.getTargetConstant(Flag, dl, MVT::i32));
5519 }
5520 }
5521
5522 // If the condition is not legal, see if we can find an equivalent one
5523 // which is legal.
5525 // If the comparison was an awkward floating-point == or != and one of
5526 // the comparison operands is infinity or negative infinity, convert the
5527 // condition to a less-awkward <= or >=.
5528 if (CFP->getValueAPF().isInfinity()) {
5529 bool IsNegInf = CFP->getValueAPF().isNegative();
5531 switch (Cond) {
5532 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5533 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5534 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5535 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5536 default: break;
5537 }
5538 if (NewCond != ISD::SETCC_INVALID &&
5539 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5540 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5541 }
5542 }
5543 }
5544
5545 if (N0 == N1) {
5546 // The sext(setcc()) => setcc() optimization relies on the appropriate
5547 // constant being emitted.
5548 assert(!N0.getValueType().isInteger() &&
5549 "Integer types should be handled by FoldSetCC");
5550
5551 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5552 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5553 if (UOF == 2) // FP operators that are undefined on NaNs.
5554 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5555 if (UOF == unsigned(EqTrue))
5556 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5557 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5558 // if it is not already.
5559 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5560 if (NewCond != Cond &&
5561 (DCI.isBeforeLegalizeOps() ||
5562 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5563 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5564 }
5565
5566 // ~X > ~Y --> Y > X
5567 // ~X < ~Y --> Y < X
5568 // ~X < C --> X > ~C
5569 // ~X > C --> X < ~C
5570 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5571 N0.getValueType().isInteger()) {
5572 if (isBitwiseNot(N0)) {
5573 if (isBitwiseNot(N1))
5574 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5575
5578 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5579 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5580 }
5581 }
5582 }
5583
5584 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5585 N0.getValueType().isInteger()) {
5586 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5587 N0.getOpcode() == ISD::XOR) {
5588 // Simplify (X+Y) == (X+Z) --> Y == Z
5589 if (N0.getOpcode() == N1.getOpcode()) {
5590 if (N0.getOperand(0) == N1.getOperand(0))
5591 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5592 if (N0.getOperand(1) == N1.getOperand(1))
5593 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5594 if (isCommutativeBinOp(N0.getOpcode())) {
5595 // If X op Y == Y op X, try other combinations.
5596 if (N0.getOperand(0) == N1.getOperand(1))
5597 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5598 Cond);
5599 if (N0.getOperand(1) == N1.getOperand(0))
5600 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5601 Cond);
5602 }
5603 }
5604
5605 // If RHS is a legal immediate value for a compare instruction, we need
5606 // to be careful about increasing register pressure needlessly.
5607 bool LegalRHSImm = false;
5608
5609 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5610 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5611 // Turn (X+C1) == C2 --> X == C2-C1
5612 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5613 return DAG.getSetCC(
5614 dl, VT, N0.getOperand(0),
5615 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5616 dl, N0.getValueType()),
5617 Cond);
5618
5619 // Turn (X^C1) == C2 --> X == C1^C2
5620 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5621 return DAG.getSetCC(
5622 dl, VT, N0.getOperand(0),
5623 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5624 dl, N0.getValueType()),
5625 Cond);
5626 }
5627
5628 // Turn (C1-X) == C2 --> X == C1-C2
5629 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5630 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5631 return DAG.getSetCC(
5632 dl, VT, N0.getOperand(1),
5633 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5634 dl, N0.getValueType()),
5635 Cond);
5636
5637 // Could RHSC fold directly into a compare?
5638 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5639 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5640 }
5641
5642 // (X+Y) == X --> Y == 0 and similar folds.
5643 // Don't do this if X is an immediate that can fold into a cmp
5644 // instruction and X+Y has other uses. It could be an induction variable
5645 // chain, and the transform would increase register pressure.
5646 if (!LegalRHSImm || N0.hasOneUse())
5647 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5648 return V;
5649 }
5650
5651 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5652 N1.getOpcode() == ISD::XOR)
5653 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5654 return V;
5655
5656 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5657 return V;
5658
5659 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5660 return V;
5661 }
5662
5663 // Fold remainder of division by a constant.
5664 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5665 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5666 // When division is cheap or optimizing for minimum size,
5667 // fall through to DIVREM creation by skipping this fold.
5668 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5669 if (N0.getOpcode() == ISD::UREM) {
5670 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5671 return Folded;
5672 } else if (N0.getOpcode() == ISD::SREM) {
5673 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5674 return Folded;
5675 }
5676 }
5677 }
5678
5679 // Fold away ALL boolean setcc's.
5680 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5681 SDValue Temp;
5682 switch (Cond) {
5683 default: llvm_unreachable("Unknown integer setcc!");
5684 case ISD::SETEQ: // X == Y -> ~(X^Y)
5685 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5686 N0 = DAG.getNOT(dl, Temp, OpVT);
5687 if (!DCI.isCalledByLegalizer())
5688 DCI.AddToWorklist(Temp.getNode());
5689 break;
5690 case ISD::SETNE: // X != Y --> (X^Y)
5691 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5692 break;
5693 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5694 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5695 Temp = DAG.getNOT(dl, N0, OpVT);
5696 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5697 if (!DCI.isCalledByLegalizer())
5698 DCI.AddToWorklist(Temp.getNode());
5699 break;
5700 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5701 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5702 Temp = DAG.getNOT(dl, N1, OpVT);
5703 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5704 if (!DCI.isCalledByLegalizer())
5705 DCI.AddToWorklist(Temp.getNode());
5706 break;
5707 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5708 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5709 Temp = DAG.getNOT(dl, N0, OpVT);
5710 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5711 if (!DCI.isCalledByLegalizer())
5712 DCI.AddToWorklist(Temp.getNode());
5713 break;
5714 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5715 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5716 Temp = DAG.getNOT(dl, N1, OpVT);
5717 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5718 break;
5719 }
5720 if (VT.getScalarType() != MVT::i1) {
5721 if (!DCI.isCalledByLegalizer())
5722 DCI.AddToWorklist(N0.getNode());
5723 // FIXME: If running after legalize, we probably can't do this.
5725 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5726 }
5727 return N0;
5728 }
5729
5730 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5731 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5732 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5734 N1->getFlags().hasNoUnsignedWrap()) ||
5736 N1->getFlags().hasNoSignedWrap())) &&
5738 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5739 }
5740
5741 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5742 // TODO: Remove that .isVector() check
5743 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5745 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5746 }
5747
5748 // Could not fold it.
5749 return SDValue();
5750}
5751
5752/// Returns true (and the GlobalValue and the offset) if the node is a
5753/// GlobalAddress + offset.
5755 int64_t &Offset) const {
5756
5757 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5758
5759 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5760 GA = GASD->getGlobal();
5761 Offset += GASD->getOffset();
5762 return true;
5763 }
5764
5765 if (N->isAnyAdd()) {
5766 SDValue N1 = N->getOperand(0);
5767 SDValue N2 = N->getOperand(1);
5768 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5769 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5770 Offset += V->getSExtValue();
5771 return true;
5772 }
5773 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5774 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5775 Offset += V->getSExtValue();
5776 return true;
5777 }
5778 }
5779 }
5780
5781 return false;
5782}
5783
5785 DAGCombinerInfo &DCI) const {
5786 // Default implementation: no optimization.
5787 return SDValue();
5788}
5789
5790//===----------------------------------------------------------------------===//
5791// Inline Assembler Implementation Methods
5792//===----------------------------------------------------------------------===//
5793
5796 unsigned S = Constraint.size();
5797
5798 if (S == 1) {
5799 switch (Constraint[0]) {
5800 default: break;
5801 case 'r':
5802 return C_RegisterClass;
5803 case 'm': // memory
5804 case 'o': // offsetable
5805 case 'V': // not offsetable
5806 return C_Memory;
5807 case 'p': // Address.
5808 return C_Address;
5809 case 'n': // Simple Integer
5810 case 'E': // Floating Point Constant
5811 case 'F': // Floating Point Constant
5812 return C_Immediate;
5813 case 'i': // Simple Integer or Relocatable Constant
5814 case 's': // Relocatable Constant
5815 case 'X': // Allow ANY value.
5816 case 'I': // Target registers.
5817 case 'J':
5818 case 'K':
5819 case 'L':
5820 case 'M':
5821 case 'N':
5822 case 'O':
5823 case 'P':
5824 case '<':
5825 case '>':
5826 return C_Other;
5827 }
5828 }
5829
5830 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5831 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5832 return C_Memory;
5833 return C_Register;
5834 }
5835 return C_Unknown;
5836}
5837
5838/// Try to replace an X constraint, which matches anything, with another that
5839/// has more specific requirements based on the type of the corresponding
5840/// operand.
5841const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5842 if (ConstraintVT.isInteger())
5843 return "r";
5844 if (ConstraintVT.isFloatingPoint())
5845 return "f"; // works for many targets
5846 return nullptr;
5847}
5848
5850 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5851 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5852 return SDValue();
5853}
5854
5855/// Lower the specified operand into the Ops vector.
5856/// If it is invalid, don't add anything to Ops.
5858 StringRef Constraint,
5859 std::vector<SDValue> &Ops,
5860 SelectionDAG &DAG) const {
5861
5862 if (Constraint.size() > 1)
5863 return;
5864
5865 char ConstraintLetter = Constraint[0];
5866 switch (ConstraintLetter) {
5867 default: break;
5868 case 'X': // Allows any operand
5869 case 'i': // Simple Integer or Relocatable Constant
5870 case 'n': // Simple Integer
5871 case 's': { // Relocatable Constant
5872
5874 uint64_t Offset = 0;
5875
5876 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5877 // etc., since getelementpointer is variadic. We can't use
5878 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5879 // while in this case the GA may be furthest from the root node which is
5880 // likely an ISD::ADD.
5881 while (true) {
5882 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5883 // gcc prints these as sign extended. Sign extend value to 64 bits
5884 // now; without this it would get ZExt'd later in
5885 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5886 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5887 BooleanContent BCont = getBooleanContents(MVT::i64);
5888 ISD::NodeType ExtOpc =
5889 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5890 int64_t ExtVal =
5891 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5892 Ops.push_back(
5893 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5894 return;
5895 }
5896 if (ConstraintLetter != 'n') {
5897 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5898 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5899 GA->getValueType(0),
5900 Offset + GA->getOffset()));
5901 return;
5902 }
5903 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5904 Ops.push_back(DAG.getTargetBlockAddress(
5905 BA->getBlockAddress(), BA->getValueType(0),
5906 Offset + BA->getOffset(), BA->getTargetFlags()));
5907 return;
5908 }
5910 Ops.push_back(Op);
5911 return;
5912 }
5913 }
5914 const unsigned OpCode = Op.getOpcode();
5915 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5916 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5917 Op = Op.getOperand(1);
5918 // Subtraction is not commutative.
5919 else if (OpCode == ISD::ADD &&
5920 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5921 Op = Op.getOperand(0);
5922 else
5923 return;
5924 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5925 continue;
5926 }
5927 return;
5928 }
5929 break;
5930 }
5931 }
5932}
5933
5937
5938std::pair<unsigned, const TargetRegisterClass *>
5940 StringRef Constraint,
5941 MVT VT) const {
5942 if (!Constraint.starts_with("{"))
5943 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5944 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5945
5946 // Remove the braces from around the name.
5947 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5948
5949 std::pair<unsigned, const TargetRegisterClass *> R =
5950 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5951
5952 // Figure out which register class contains this reg.
5953 for (const TargetRegisterClass *RC : RI->regclasses()) {
5954 // If none of the value types for this register class are valid, we
5955 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5956 if (!isLegalRC(*RI, *RC))
5957 continue;
5958
5959 for (const MCPhysReg &PR : *RC) {
5960 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5961 std::pair<unsigned, const TargetRegisterClass *> S =
5962 std::make_pair(PR, RC);
5963
5964 // If this register class has the requested value type, return it,
5965 // otherwise keep searching and return the first class found
5966 // if no other is found which explicitly has the requested type.
5967 if (RI->isTypeLegalForClass(*RC, VT))
5968 return S;
5969 if (!R.second)
5970 R = S;
5971 }
5972 }
5973 }
5974
5975 return R;
5976}
5977
5978//===----------------------------------------------------------------------===//
5979// Constraint Selection.
5980
5981/// Return true of this is an input operand that is a matching constraint like
5982/// "4".
5984 assert(!ConstraintCode.empty() && "No known constraint!");
5985 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5986}
5987
5988/// If this is an input matching constraint, this method returns the output
5989/// operand it matches.
5991 assert(!ConstraintCode.empty() && "No known constraint!");
5992 return atoi(ConstraintCode.c_str());
5993}
5994
5995/// Split up the constraint string from the inline assembly value into the
5996/// specific constraints and their prefixes, and also tie in the associated
5997/// operand values.
5998/// If this returns an empty vector, and if the constraint string itself
5999/// isn't empty, there was an error parsing.
6002 const TargetRegisterInfo *TRI,
6003 const CallBase &Call) const {
6004 /// Information about all of the constraints.
6005 AsmOperandInfoVector ConstraintOperands;
6006 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
6007 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6008
6009 // Do a prepass over the constraints, canonicalizing them, and building up the
6010 // ConstraintOperands list.
6011 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6012 unsigned ResNo = 0; // ResNo - The result number of the next output.
6013 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6014
6015 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6016 ConstraintOperands.emplace_back(std::move(CI));
6017 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6018
6019 // Update multiple alternative constraint count.
6020 if (OpInfo.multipleAlternatives.size() > maCount)
6021 maCount = OpInfo.multipleAlternatives.size();
6022
6023 OpInfo.ConstraintVT = MVT::Other;
6024
6025 // Compute the value type for each operand.
6026 switch (OpInfo.Type) {
6027 case InlineAsm::isOutput: {
6028 // Indirect outputs just consume an argument.
6029 if (OpInfo.isIndirect) {
6030 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6031 break;
6032 }
6033
6034 // The return value of the call is this value. As such, there is no
6035 // corresponding argument.
6036 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6037 EVT VT;
6038 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
6039 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
6040 } else {
6041 assert(ResNo == 0 && "Asm only has one result!");
6042 VT = getAsmOperandValueType(DL, Call.getType());
6043 }
6044 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6045 ++ResNo;
6046 break;
6047 }
6048 case InlineAsm::isInput:
6049 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6050 break;
6051 case InlineAsm::isLabel:
6052 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
6053 ++LabelNo;
6054 continue;
6056 // Nothing to do.
6057 break;
6058 }
6059
6060 if (OpInfo.CallOperandVal) {
6061 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6062 if (OpInfo.isIndirect) {
6063 OpTy = Call.getParamElementType(ArgNo);
6064 assert(OpTy && "Indirect operand must have elementtype attribute");
6065 }
6066
6067 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6068 if (StructType *STy = dyn_cast<StructType>(OpTy))
6069 if (STy->getNumElements() == 1)
6070 OpTy = STy->getElementType(0);
6071
6072 // If OpTy is not a single value, it may be a struct/union that we
6073 // can tile with integers.
6074 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6075 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6076 switch (BitSize) {
6077 default: break;
6078 case 1:
6079 case 8:
6080 case 16:
6081 case 32:
6082 case 64:
6083 case 128:
6084 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6085 break;
6086 }
6087 }
6088
6089 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6090 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6091 ArgNo++;
6092 }
6093 }
6094
6095 // If we have multiple alternative constraints, select the best alternative.
6096 if (!ConstraintOperands.empty()) {
6097 if (maCount) {
6098 unsigned bestMAIndex = 0;
6099 int bestWeight = -1;
6100 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6101 int weight = -1;
6102 unsigned maIndex;
6103 // Compute the sums of the weights for each alternative, keeping track
6104 // of the best (highest weight) one so far.
6105 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6106 int weightSum = 0;
6107 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6108 cIndex != eIndex; ++cIndex) {
6109 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6110 if (OpInfo.Type == InlineAsm::isClobber)
6111 continue;
6112
6113 // If this is an output operand with a matching input operand,
6114 // look up the matching input. If their types mismatch, e.g. one
6115 // is an integer, the other is floating point, or their sizes are
6116 // different, flag it as an maCantMatch.
6117 if (OpInfo.hasMatchingInput()) {
6118 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6119 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6120 if ((OpInfo.ConstraintVT.isInteger() !=
6121 Input.ConstraintVT.isInteger()) ||
6122 (OpInfo.ConstraintVT.getSizeInBits() !=
6123 Input.ConstraintVT.getSizeInBits())) {
6124 weightSum = -1; // Can't match.
6125 break;
6126 }
6127 }
6128 }
6129 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6130 if (weight == -1) {
6131 weightSum = -1;
6132 break;
6133 }
6134 weightSum += weight;
6135 }
6136 // Update best.
6137 if (weightSum > bestWeight) {
6138 bestWeight = weightSum;
6139 bestMAIndex = maIndex;
6140 }
6141 }
6142
6143 // Now select chosen alternative in each constraint.
6144 for (AsmOperandInfo &cInfo : ConstraintOperands)
6145 if (cInfo.Type != InlineAsm::isClobber)
6146 cInfo.selectAlternative(bestMAIndex);
6147 }
6148 }
6149
6150 // Check and hook up tied operands, choose constraint code to use.
6151 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6152 cIndex != eIndex; ++cIndex) {
6153 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6154
6155 // If this is an output operand with a matching input operand, look up the
6156 // matching input. If their types mismatch, e.g. one is an integer, the
6157 // other is floating point, or their sizes are different, flag it as an
6158 // error.
6159 if (OpInfo.hasMatchingInput()) {
6160 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6161
6162 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6163 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6164 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6165 OpInfo.ConstraintVT);
6166 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6167 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6168 Input.ConstraintVT);
6169 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6170 OpInfo.ConstraintVT.isFloatingPoint();
6171 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6172 Input.ConstraintVT.isFloatingPoint();
6173 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6174 (MatchRC.second != InputRC.second)) {
6175 report_fatal_error("Unsupported asm: input constraint"
6176 " with a matching output constraint of"
6177 " incompatible type!");
6178 }
6179 }
6180 }
6181 }
6182
6183 return ConstraintOperands;
6184}
6185
6186/// Return a number indicating our preference for chosing a type of constraint
6187/// over another, for the purpose of sorting them. Immediates are almost always
6188/// preferrable (when they can be emitted). A higher return value means a
6189/// stronger preference for one constraint type relative to another.
6190/// FIXME: We should prefer registers over memory but doing so may lead to
6191/// unrecoverable register exhaustion later.
6192/// https://github.com/llvm/llvm-project/issues/20571
6194 switch (CT) {
6197 return 4;
6200 return 3;
6202 return 2;
6204 return 1;
6206 return 0;
6207 }
6208 llvm_unreachable("Invalid constraint type");
6209}
6210
6211/// Examine constraint type and operand type and determine a weight value.
6212/// This object must already have been set up with the operand type
6213/// and the current alternative constraint selected.
6216 AsmOperandInfo &info, int maIndex) const {
6218 if (maIndex >= (int)info.multipleAlternatives.size())
6219 rCodes = &info.Codes;
6220 else
6221 rCodes = &info.multipleAlternatives[maIndex].Codes;
6222 ConstraintWeight BestWeight = CW_Invalid;
6223
6224 // Loop over the options, keeping track of the most general one.
6225 for (const std::string &rCode : *rCodes) {
6226 ConstraintWeight weight =
6227 getSingleConstraintMatchWeight(info, rCode.c_str());
6228 if (weight > BestWeight)
6229 BestWeight = weight;
6230 }
6231
6232 return BestWeight;
6233}
6234
6235/// Examine constraint type and operand type and determine a weight value.
6236/// This object must already have been set up with the operand type
6237/// and the current alternative constraint selected.
6240 AsmOperandInfo &info, const char *constraint) const {
6242 Value *CallOperandVal = info.CallOperandVal;
6243 // If we don't have a value, we can't do a match,
6244 // but allow it at the lowest weight.
6245 if (!CallOperandVal)
6246 return CW_Default;
6247 // Look at the constraint type.
6248 switch (*constraint) {
6249 case 'i': // immediate integer.
6250 case 'n': // immediate integer with a known value.
6251 if (isa<ConstantInt>(CallOperandVal))
6252 weight = CW_Constant;
6253 break;
6254 case 's': // non-explicit intregal immediate.
6255 if (isa<GlobalValue>(CallOperandVal))
6256 weight = CW_Constant;
6257 break;
6258 case 'E': // immediate float if host format.
6259 case 'F': // immediate float.
6260 if (isa<ConstantFP>(CallOperandVal))
6261 weight = CW_Constant;
6262 break;
6263 case '<': // memory operand with autodecrement.
6264 case '>': // memory operand with autoincrement.
6265 case 'm': // memory operand.
6266 case 'o': // offsettable memory operand
6267 case 'V': // non-offsettable memory operand
6268 weight = CW_Memory;
6269 break;
6270 case 'r': // general register.
6271 case 'g': // general register, memory operand or immediate integer.
6272 // note: Clang converts "g" to "imr".
6273 if (CallOperandVal->getType()->isIntegerTy())
6274 weight = CW_Register;
6275 break;
6276 case 'X': // any operand.
6277 default:
6278 weight = CW_Default;
6279 break;
6280 }
6281 return weight;
6282}
6283
6284/// If there are multiple different constraints that we could pick for this
6285/// operand (e.g. "imr") try to pick the 'best' one.
6286/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6287/// into seven classes:
6288/// Register -> one specific register
6289/// RegisterClass -> a group of regs
6290/// Memory -> memory
6291/// Address -> a symbolic memory reference
6292/// Immediate -> immediate values
6293/// Other -> magic values (such as "Flag Output Operands")
6294/// Unknown -> something we don't recognize yet and can't handle
6295/// Ideally, we would pick the most specific constraint possible: if we have
6296/// something that fits into a register, we would pick it. The problem here
6297/// is that if we have something that could either be in a register or in
6298/// memory that use of the register could cause selection of *other*
6299/// operands to fail: they might only succeed if we pick memory. Because of
6300/// this the heuristic we use is:
6301///
6302/// 1) If there is an 'other' constraint, and if the operand is valid for
6303/// that constraint, use it. This makes us take advantage of 'i'
6304/// constraints when available.
6305/// 2) Otherwise, pick the most general constraint present. This prefers
6306/// 'm' over 'r', for example.
6307///
6309 TargetLowering::AsmOperandInfo &OpInfo) const {
6310 ConstraintGroup Ret;
6311
6312 Ret.reserve(OpInfo.Codes.size());
6313 for (StringRef Code : OpInfo.Codes) {
6315
6316 // Indirect 'other' or 'immediate' constraints are not allowed.
6317 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6318 CType == TargetLowering::C_Register ||
6320 continue;
6321
6322 // Things with matching constraints can only be registers, per gcc
6323 // documentation. This mainly affects "g" constraints.
6324 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6325 continue;
6326
6327 Ret.emplace_back(Code, CType);
6328 }
6329
6331 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6332 });
6333
6334 return Ret;
6335}
6336
6337/// If we have an immediate, see if we can lower it. Return true if we can,
6338/// false otherwise.
6340 SDValue Op, SelectionDAG *DAG,
6341 const TargetLowering &TLI) {
6342
6343 assert((P.second == TargetLowering::C_Other ||
6344 P.second == TargetLowering::C_Immediate) &&
6345 "need immediate or other");
6346
6347 if (!Op.getNode())
6348 return false;
6349
6350 std::vector<SDValue> ResultOps;
6351 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6352 return !ResultOps.empty();
6353}
6354
6355/// Determines the constraint code and constraint type to use for the specific
6356/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6358 SDValue Op,
6359 SelectionDAG *DAG) const {
6360 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6361
6362 // Single-letter constraints ('r') are very common.
6363 if (OpInfo.Codes.size() == 1) {
6364 OpInfo.ConstraintCode = OpInfo.Codes[0];
6365 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6366 } else {
6368 if (G.empty())
6369 return;
6370
6371 unsigned BestIdx = 0;
6372 for (const unsigned E = G.size();
6373 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6374 G[BestIdx].second == TargetLowering::C_Immediate);
6375 ++BestIdx) {
6376 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6377 break;
6378 // If we're out of constraints, just pick the first one.
6379 if (BestIdx + 1 == E) {
6380 BestIdx = 0;
6381 break;
6382 }
6383 }
6384
6385 OpInfo.ConstraintCode = G[BestIdx].first;
6386 OpInfo.ConstraintType = G[BestIdx].second;
6387 }
6388
6389 // 'X' matches anything.
6390 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6391 // Constants are handled elsewhere. For Functions, the type here is the
6392 // type of the result, which is not what we want to look at; leave them
6393 // alone.
6394 Value *v = OpInfo.CallOperandVal;
6395 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6396 return;
6397 }
6398
6399 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6400 OpInfo.ConstraintCode = "i";
6401 return;
6402 }
6403
6404 // Otherwise, try to resolve it to something we know about by looking at
6405 // the actual operand type.
6406 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6407 OpInfo.ConstraintCode = Repl;
6408 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6409 }
6410 }
6411}
6412
6413/// Given an exact SDIV by a constant, create a multiplication
6414/// with the multiplicative inverse of the constant.
6415/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6417 const SDLoc &dl, SelectionDAG &DAG,
6418 SmallVectorImpl<SDNode *> &Created) {
6419 SDValue Op0 = N->getOperand(0);
6420 SDValue Op1 = N->getOperand(1);
6421 EVT VT = N->getValueType(0);
6422 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6423 EVT ShSVT = ShVT.getScalarType();
6424
6425 bool UseSRA = false;
6426 SmallVector<SDValue, 16> Shifts, Factors;
6427
6428 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6429 if (C->isZero())
6430 return false;
6431
6432 EVT CT = C->getValueType(0);
6433 APInt Divisor = C->getAPIntValue();
6434 unsigned Shift = Divisor.countr_zero();
6435 if (Shift) {
6436 Divisor.ashrInPlace(Shift);
6437 UseSRA = true;
6438 }
6439 APInt Factor = Divisor.multiplicativeInverse();
6440 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6441 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6442 return true;
6443 };
6444
6445 // Collect all magic values from the build vector.
6446 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6447 return SDValue();
6448
6449 SDValue Shift, Factor;
6450 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6451 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6452 Factor = DAG.getBuildVector(VT, dl, Factors);
6453 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6454 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6455 "Expected matchUnaryPredicate to return one element for scalable "
6456 "vectors");
6457 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6458 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6459 } else {
6460 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6461 Shift = Shifts[0];
6462 Factor = Factors[0];
6463 }
6464
6465 SDValue Res = Op0;
6466 if (UseSRA) {
6467 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6468 Created.push_back(Res.getNode());
6469 }
6470
6471 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6472}
6473
6474/// Given an exact UDIV by a constant, create a multiplication
6475/// with the multiplicative inverse of the constant.
6476/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6478 const SDLoc &dl, SelectionDAG &DAG,
6479 SmallVectorImpl<SDNode *> &Created) {
6480 EVT VT = N->getValueType(0);
6481 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6482 EVT ShSVT = ShVT.getScalarType();
6483
6484 bool UseSRL = false;
6485 SmallVector<SDValue, 16> Shifts, Factors;
6486
6487 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6488 if (C->isZero())
6489 return false;
6490
6491 EVT CT = C->getValueType(0);
6492 APInt Divisor = C->getAPIntValue();
6493 unsigned Shift = Divisor.countr_zero();
6494 if (Shift) {
6495 Divisor.lshrInPlace(Shift);
6496 UseSRL = true;
6497 }
6498 // Calculate the multiplicative inverse modulo BW.
6499 APInt Factor = Divisor.multiplicativeInverse();
6500 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6501 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6502 return true;
6503 };
6504
6505 SDValue Op1 = N->getOperand(1);
6506
6507 // Collect all magic values from the build vector.
6508 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6509 return SDValue();
6510
6511 SDValue Shift, Factor;
6512 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6513 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6514 Factor = DAG.getBuildVector(VT, dl, Factors);
6515 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6516 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6517 "Expected matchUnaryPredicate to return one element for scalable "
6518 "vectors");
6519 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6520 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6521 } else {
6522 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6523 Shift = Shifts[0];
6524 Factor = Factors[0];
6525 }
6526
6527 SDValue Res = N->getOperand(0);
6528 if (UseSRL) {
6529 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6530 Created.push_back(Res.getNode());
6531 }
6532
6533 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6534}
6535
6537 SelectionDAG &DAG,
6538 SmallVectorImpl<SDNode *> &Created) const {
6539 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6540 if (isIntDivCheap(N->getValueType(0), Attr))
6541 return SDValue(N, 0); // Lower SDIV as SDIV
6542 return SDValue();
6543}
6544
6545SDValue
6547 SelectionDAG &DAG,
6548 SmallVectorImpl<SDNode *> &Created) const {
6549 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6550 if (isIntDivCheap(N->getValueType(0), Attr))
6551 return SDValue(N, 0); // Lower SREM as SREM
6552 return SDValue();
6553}
6554
6555/// Build sdiv by power-of-2 with conditional move instructions
6556/// Ref: "Hacker's Delight" by Henry Warren 10-1
6557/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6558/// bgez x, label
6559/// add x, x, 2**k-1
6560/// label:
6561/// sra res, x, k
6562/// neg res, res (when the divisor is negative)
6564 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6565 SmallVectorImpl<SDNode *> &Created) const {
6566 unsigned Lg2 = Divisor.countr_zero();
6567 EVT VT = N->getValueType(0);
6568
6569 SDLoc DL(N);
6570 SDValue N0 = N->getOperand(0);
6571 SDValue Zero = DAG.getConstant(0, DL, VT);
6572 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6573 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6574
6575 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6576 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6577 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6578 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6579 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6580
6581 Created.push_back(Cmp.getNode());
6582 Created.push_back(Add.getNode());
6583 Created.push_back(CMov.getNode());
6584
6585 // Divide by pow2.
6586 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6587 DAG.getShiftAmountConstant(Lg2, VT, DL));
6588
6589 // If we're dividing by a positive value, we're done. Otherwise, we must
6590 // negate the result.
6591 if (Divisor.isNonNegative())
6592 return SRA;
6593
6594 Created.push_back(SRA.getNode());
6595 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6596}
6597
6598/// Given an ISD::SDIV node expressing a divide by constant,
6599/// return a DAG expression to select that will generate the same value by
6600/// multiplying by a magic number.
6601/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6603 bool IsAfterLegalization,
6604 bool IsAfterLegalTypes,
6605 SmallVectorImpl<SDNode *> &Created) const {
6606 SDLoc dl(N);
6607
6608 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6609 if (N->getFlags().hasExact())
6610 return BuildExactSDIV(*this, N, dl, DAG, Created);
6611
6612 EVT VT = N->getValueType(0);
6613 EVT SVT = VT.getScalarType();
6614 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6615 EVT ShSVT = ShVT.getScalarType();
6616 unsigned EltBits = VT.getScalarSizeInBits();
6617 EVT MulVT;
6618
6619 // Check to see if we can do this.
6620 // FIXME: We should be more aggressive here.
6621 EVT QueryVT = VT;
6622 if (VT.isVector()) {
6623 // If the vector type will be legalized to a vector type with the same
6624 // element type, allow the transform before type legalization if MULHS or
6625 // SMUL_LOHI are supported.
6626 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6627 if (!QueryVT.isVector() ||
6629 return SDValue();
6630 } else if (!isTypeLegal(VT)) {
6631 // Limit this to simple scalars for now.
6632 if (!VT.isSimple())
6633 return SDValue();
6634
6635 // If this type will be promoted to a large enough type with a legal
6636 // multiply operation, we can go ahead and do this transform.
6638 return SDValue();
6639
6640 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6641 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6642 !isOperationLegal(ISD::MUL, MulVT))
6643 return SDValue();
6644 }
6645
6646 bool HasMULHS =
6647 isOperationLegalOrCustom(ISD::MULHS, QueryVT, IsAfterLegalization);
6648 bool HasSMUL_LOHI =
6649 isOperationLegalOrCustom(ISD::SMUL_LOHI, QueryVT, IsAfterLegalization);
6650
6651 if (isTypeLegal(VT) && !HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6652 // If type twice as wide legal, widen and use a mul plus a shift.
6653 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6654 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6655 // custom lowered. This is very expensive so avoid it at all costs for
6656 // constant divisors.
6657 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6660 MulVT = WideVT;
6661 }
6662
6663 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6664 return SDValue();
6665
6666 // If we're after type legalization and SVT is not legal, use the
6667 // promoted type for creating constants to avoid creating nodes with
6668 // illegal types.
6669 if (IsAfterLegalTypes && VT.isVector()) {
6670 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6671 if (SVT.bitsLT(VT.getScalarType()))
6672 return SDValue();
6673 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6674 if (ShSVT.bitsLT(ShVT.getScalarType()))
6675 return SDValue();
6676 }
6677 const unsigned SVTBits = SVT.getSizeInBits();
6678
6679 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6680
6681 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6682 if (C->isZero())
6683 return false;
6684 // Truncate the divisor to the target scalar type in case it was promoted
6685 // during type legalization.
6686 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6688 int NumeratorFactor = 0;
6689 int ShiftMask = -1;
6690
6691 if (Divisor.isOne() || Divisor.isAllOnes()) {
6692 // If d is +1/-1, we just multiply the numerator by +1/-1.
6693 NumeratorFactor = Divisor.getSExtValue();
6694 magics.Magic = 0;
6695 magics.ShiftAmount = 0;
6696 ShiftMask = 0;
6697 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6698 // If d > 0 and m < 0, add the numerator.
6699 NumeratorFactor = 1;
6700 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6701 // If d < 0 and m > 0, subtract the numerator.
6702 NumeratorFactor = -1;
6703 }
6704
6705 MagicFactors.push_back(
6706 DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT));
6707 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6708 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6709 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6710 return true;
6711 };
6712
6713 SDValue N0 = N->getOperand(0);
6714 SDValue N1 = N->getOperand(1);
6715
6716 // Collect the shifts / magic values from each element.
6717 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6718 /*AllowTruncation=*/true))
6719 return SDValue();
6720
6721 SDValue MagicFactor, Factor, Shift, ShiftMask;
6722 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6723 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6724 Factor = DAG.getBuildVector(VT, dl, Factors);
6725 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6726 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6727 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6728 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6729 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6730 "Expected matchUnaryPredicate to return one element for scalable "
6731 "vectors");
6732 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6733 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6734 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6735 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6736 } else {
6737 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6738 MagicFactor = MagicFactors[0];
6739 Factor = Factors[0];
6740 Shift = Shifts[0];
6741 ShiftMask = ShiftMasks[0];
6742 }
6743
6744 // Multiply the numerator (operand 0) by the magic value.
6745 auto GetMULHS = [&](SDValue X, SDValue Y) {
6746 if (HasMULHS)
6747 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6748 if (HasSMUL_LOHI) {
6749 SDValue LoHi =
6750 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6751 return LoHi.getValue(1);
6752 }
6753
6754 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6755 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6756 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6757 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6758 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6759 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6760 };
6761
6762 SDValue Q = GetMULHS(N0, MagicFactor);
6763 if (!Q)
6764 return SDValue();
6765
6766 Created.push_back(Q.getNode());
6767
6768 // (Optionally) Add/subtract the numerator using Factor.
6769 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6770 Created.push_back(Factor.getNode());
6771 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6772 Created.push_back(Q.getNode());
6773
6774 // Shift right algebraic by shift value.
6775 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6776 Created.push_back(Q.getNode());
6777
6778 // Extract the sign bit, mask it and add it to the quotient.
6779 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6780 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6781 Created.push_back(T.getNode());
6782 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6783 Created.push_back(T.getNode());
6784 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6785}
6786
6787/// Given an ISD::UDIV node expressing a divide by constant,
6788/// return a DAG expression to select that will generate the same value by
6789/// multiplying by a magic number.
6790/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6792 bool IsAfterLegalization,
6793 bool IsAfterLegalTypes,
6794 SmallVectorImpl<SDNode *> &Created) const {
6795 SDLoc dl(N);
6796
6797 // If the udiv has an 'exact' bit we can use a simpler lowering.
6798 if (N->getFlags().hasExact())
6799 return BuildExactUDIV(*this, N, dl, DAG, Created);
6800
6801 EVT VT = N->getValueType(0);
6802 EVT SVT = VT.getScalarType();
6803 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6804 EVT ShSVT = ShVT.getScalarType();
6805 unsigned EltBits = VT.getScalarSizeInBits();
6806 EVT MulVT;
6807
6808 // Check to see if we can do this.
6809 // FIXME: We should be more aggressive here.
6810 EVT QueryVT = VT;
6811 if (VT.isVector()) {
6812 // If the vector type will be legalized to a vector type with the same
6813 // element type, allow the transform before type legalization if MULHU or
6814 // UMUL_LOHI are supported.
6815 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6816 if (!QueryVT.isVector() ||
6818 return SDValue();
6819 } else if (!isTypeLegal(VT)) {
6820 // Limit this to simple scalars for now.
6821 if (!VT.isSimple())
6822 return SDValue();
6823
6824 // If this type will be promoted to a large enough type with a legal
6825 // multiply operation, we can go ahead and do this transform.
6827 return SDValue();
6828
6829 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6830 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6831 !isOperationLegal(ISD::MUL, MulVT))
6832 return SDValue();
6833 }
6834
6835 bool HasMULHU =
6836 isOperationLegalOrCustom(ISD::MULHU, QueryVT, IsAfterLegalization);
6837 bool HasUMUL_LOHI =
6838 isOperationLegalOrCustom(ISD::UMUL_LOHI, QueryVT, IsAfterLegalization);
6839
6840 if (isTypeLegal(VT) && !HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6841 // If type twice as wide legal, widen and use a mul plus a shift.
6842 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6843 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6844 // custom lowered. This is very expensive so avoid it at all costs for
6845 // constant divisors.
6846 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6849 MulVT = WideVT;
6850 }
6851
6852 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6853 return SDValue();
6854
6855 SDValue N0 = N->getOperand(0);
6856 SDValue N1 = N->getOperand(1);
6857
6858 // Try to use leading zeros of the dividend to reduce the multiplier and
6859 // avoid expensive fixups.
6860 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6861
6862 // If we're after type legalization and SVT is not legal, use the
6863 // promoted type for creating constants to avoid creating nodes with
6864 // illegal types.
6865 if (IsAfterLegalTypes && VT.isVector()) {
6866 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6867 if (SVT.bitsLT(VT.getScalarType()))
6868 return SDValue();
6869 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6870 if (ShSVT.bitsLT(ShVT.getScalarType()))
6871 return SDValue();
6872 }
6873 const unsigned SVTBits = SVT.getSizeInBits();
6874
6875 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6876 // UMUL_LOHI is supported.
6877 const EVT WideSVT = MVT::i64;
6878 const bool HasWideMULHU =
6879 VT == MVT::i32 &&
6880 isOperationLegalOrCustom(ISD::MULHU, WideSVT, IsAfterLegalization);
6881 const bool HasWideUMUL_LOHI =
6882 VT == MVT::i32 &&
6883 isOperationLegalOrCustom(ISD::UMUL_LOHI, WideSVT, IsAfterLegalization);
6884 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6885
6886 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6887 bool UseWiden = false;
6888 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6889
6890 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6891 if (C->isZero())
6892 return false;
6893 // Truncate the divisor to the target scalar type in case it was promoted
6894 // during type legalization.
6895 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6896
6897 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6898
6899 // Magic algorithm doesn't work for division by 1. We need to emit a select
6900 // at the end.
6901 if (Divisor.isOne()) {
6902 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6903 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6904 } else {
6907 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()),
6908 /*AllowEvenDivisorOptimization=*/true,
6909 /*AllowWidenOptimization=*/AllowWiden);
6910
6911 if (magics.Widen) {
6912 UseWiden = true;
6913 MagicFactor = DAG.getConstant(magics.Magic, dl, WideSVT);
6914 } else {
6915 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6916 }
6917
6918 assert(magics.PreShift < Divisor.getBitWidth() &&
6919 "We shouldn't generate an undefined shift!");
6920 assert(magics.PostShift < Divisor.getBitWidth() &&
6921 "We shouldn't generate an undefined shift!");
6922 assert((!magics.IsAdd || magics.PreShift == 0) &&
6923 "Unexpected pre-shift");
6924 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6925 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6926 NPQFactor = DAG.getConstant(
6927 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6928 : APInt::getZero(SVTBits),
6929 dl, SVT);
6930 UseNPQ |= magics.IsAdd;
6931 UsePreShift |= magics.PreShift != 0;
6932 UsePostShift |= magics.PostShift != 0;
6933 }
6934
6935 PreShifts.push_back(PreShift);
6936 MagicFactors.push_back(MagicFactor);
6937 NPQFactors.push_back(NPQFactor);
6938 PostShifts.push_back(PostShift);
6939 return true;
6940 };
6941
6942 // Collect the shifts/magic values from each element.
6943 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6944 /*AllowTruncation=*/true))
6945 return SDValue();
6946
6947 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6948 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6949 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6950 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6951 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6952 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6953 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6954 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6955 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6956 "Expected matchUnaryPredicate to return one for scalable vectors");
6957 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6958 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6959 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6960 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6961 } else {
6962 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6963 PreShift = PreShifts[0];
6964 MagicFactor = MagicFactors[0];
6965 PostShift = PostShifts[0];
6966 }
6967
6968 if (UseWiden) {
6969 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
6970 SDValue WideN0 = DAG.getNode(ISD::ZERO_EXTEND, dl, WideSVT, N0);
6971
6972 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
6973 // WideSVT bits
6974 SDValue High;
6975 if (HasWideMULHU) {
6976 High = DAG.getNode(ISD::MULHU, dl, WideSVT, WideN0, MagicFactor);
6977 } else {
6978 assert(HasWideUMUL_LOHI);
6979 SDValue LoHi =
6980 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(WideSVT, WideSVT),
6981 WideN0, MagicFactor);
6982 High = LoHi.getValue(1);
6983 }
6984
6985 Created.push_back(High.getNode());
6986 return DAG.getNode(ISD::TRUNCATE, dl, VT, High);
6987 }
6988
6989 SDValue Q = N0;
6990 if (UsePreShift) {
6991 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6992 Created.push_back(Q.getNode());
6993 }
6994
6995 auto GetMULHU = [&](SDValue X, SDValue Y) {
6996 if (HasMULHU)
6997 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6998 if (HasUMUL_LOHI) {
6999 SDValue LoHi =
7000 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
7001 return LoHi.getValue(1);
7002 }
7003
7004 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
7005 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
7006 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
7007 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
7008 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
7009 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
7010 };
7011
7012 // Multiply the numerator (operand 0) by the magic value.
7013 Q = GetMULHU(Q, MagicFactor);
7014 if (!Q)
7015 return SDValue();
7016
7017 Created.push_back(Q.getNode());
7018
7019 if (UseNPQ) {
7020 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
7021 Created.push_back(NPQ.getNode());
7022
7023 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
7024 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
7025 if (VT.isVector())
7026 NPQ = GetMULHU(NPQ, NPQFactor);
7027 else
7028 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
7029
7030 Created.push_back(NPQ.getNode());
7031
7032 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
7033 Created.push_back(Q.getNode());
7034 }
7035
7036 if (UsePostShift) {
7037 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
7038 Created.push_back(Q.getNode());
7039 }
7040
7041 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7042
7043 SDValue One = DAG.getConstant(1, dl, VT);
7044 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
7045 return DAG.getSelect(dl, VT, IsOne, N0, Q);
7046}
7047
7048/// If all values in Values that *don't* match the predicate are same 'splat'
7049/// value, then replace all values with that splat value.
7050/// Else, if AlternativeReplacement was provided, then replace all values that
7051/// do match predicate with AlternativeReplacement value.
7052static void
7054 std::function<bool(SDValue)> Predicate,
7055 SDValue AlternativeReplacement = SDValue()) {
7056 SDValue Replacement;
7057 // Is there a value for which the Predicate does *NOT* match? What is it?
7058 auto SplatValue = llvm::find_if_not(Values, Predicate);
7059 if (SplatValue != Values.end()) {
7060 // Does Values consist only of SplatValue's and values matching Predicate?
7061 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
7062 return Value == *SplatValue || Predicate(Value);
7063 })) // Then we shall replace values matching predicate with SplatValue.
7064 Replacement = *SplatValue;
7065 }
7066 if (!Replacement) {
7067 // Oops, we did not find the "baseline" splat value.
7068 if (!AlternativeReplacement)
7069 return; // Nothing to do.
7070 // Let's replace with provided value then.
7071 Replacement = AlternativeReplacement;
7072 }
7073 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
7074}
7075
7076/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7077/// where the divisor and comparison target are constants,
7078/// return a DAG expression that will generate the same comparison result
7079/// using only multiplications, additions and shifts/rotations.
7080/// Ref: "Hacker's Delight" 10-17.
7081SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7082 SDValue CompTargetNode,
7084 DAGCombinerInfo &DCI,
7085 const SDLoc &DL) const {
7087 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7088 DCI, DL, Built)) {
7089 for (SDNode *N : Built)
7090 DCI.AddToWorklist(N);
7091 return Folded;
7092 }
7093
7094 return SDValue();
7095}
7096
7097SDValue
7098TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7099 SDValue CompTargetNode, ISD::CondCode Cond,
7100 DAGCombinerInfo &DCI, const SDLoc &DL,
7101 SmallVectorImpl<SDNode *> &Created) const {
7102 // fold (seteq/ne (urem N, D), C) ->
7103 // (setule/ugt (rotr (mul (sub N, C), P), K), Q)
7104 // - D must be constant, with D = D0 * 2^K where D0 is odd
7105 // - P is the multiplicative inverse of D0 modulo 2^W
7106 // - Q = floor(((2^W) - 1) / D)
7107 // where W is the width of the common type of N and D.
7108 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7109 "Only applicable for (in)equality comparisons.");
7110
7111 SelectionDAG &DAG = DCI.DAG;
7112
7113 EVT VT = REMNode.getValueType();
7114 EVT SVT = VT.getScalarType();
7115 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7116 EVT ShSVT = ShVT.getScalarType();
7117
7118 // If MUL is unavailable, we cannot proceed in any case.
7119 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7120 return SDValue();
7121
7122 bool ComparingWithAllZeros = true;
7123 bool AllComparisonsWithNonZerosAreTautological = true;
7124 bool HadTautologicalLanes = false;
7125 bool AllLanesAreTautological = true;
7126 bool HadEvenDivisor = false;
7127 bool AllDivisorsArePowerOfTwo = true;
7128 bool HadTautologicalInvertedLanes = false;
7129 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7130
7131 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7132 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7133 if (CDiv->isZero())
7134 return false;
7135
7136 const APInt &D = CDiv->getAPIntValue();
7137 const APInt &Cmp = CCmp->getAPIntValue();
7138
7139 ComparingWithAllZeros &= Cmp.isZero();
7140
7141 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7142 // if C2 is not less than C1, the comparison is always false.
7143 // But we will only be able to produce the comparison that will give the
7144 // opposive tautological answer. So this lane would need to be fixed up.
7145 bool TautologicalInvertedLane = D.ule(Cmp);
7146 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7147
7148 // If all lanes are tautological (either all divisors are ones, or divisor
7149 // is not greater than the constant we are comparing with),
7150 // we will prefer to avoid the fold.
7151 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7152 HadTautologicalLanes |= TautologicalLane;
7153 AllLanesAreTautological &= TautologicalLane;
7154
7155 // If we are comparing with non-zero, we need'll need to subtract said
7156 // comparison value from the LHS. But there is no point in doing that if
7157 // every lane where we are comparing with non-zero is tautological..
7158 if (!Cmp.isZero())
7159 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7160
7161 // Decompose D into D0 * 2^K
7162 unsigned K = D.countr_zero();
7163 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7164 APInt D0 = D.lshr(K);
7165
7166 // D is even if it has trailing zeros.
7167 HadEvenDivisor |= (K != 0);
7168 // D is a power-of-two if D0 is one.
7169 // If all divisors are power-of-two, we will prefer to avoid the fold.
7170 AllDivisorsArePowerOfTwo &= D0.isOne();
7171
7172 // P = inv(D0, 2^W)
7173 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7174 unsigned W = D.getBitWidth();
7175 APInt P = D0.multiplicativeInverse();
7176 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7177
7178 // Q = floor((2^W - 1) u/ D)
7179 // R = ((2^W - 1) u% D)
7180 APInt Q, R;
7182
7183 // If we are comparing with zero, then that comparison constant is okay,
7184 // else it may need to be one less than that.
7185 if (Cmp.ugt(R))
7186 Q -= 1;
7187
7189 "We are expecting that K is always less than all-ones for ShSVT");
7190
7191 // If the lane is tautological the result can be constant-folded.
7192 if (TautologicalLane) {
7193 // Set P and K amount to a bogus values so we can try to splat them.
7194 P = 0;
7195 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7196 // And ensure that comparison constant is tautological,
7197 // it will always compare true/false.
7198 Q.setAllBits();
7199 } else {
7200 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7201 }
7202
7203 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7204 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7205 return true;
7206 };
7207
7208 SDValue N = REMNode.getOperand(0);
7209 SDValue D = REMNode.getOperand(1);
7210
7211 // Collect the values from each element.
7212 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7213 return SDValue();
7214
7215 // If all lanes are tautological, the result can be constant-folded.
7216 if (AllLanesAreTautological)
7217 return SDValue();
7218
7219 // If this is a urem by a powers-of-two, avoid the fold since it can be
7220 // best implemented as a bit test.
7221 if (AllDivisorsArePowerOfTwo)
7222 return SDValue();
7223
7224 SDValue PVal, KVal, QVal;
7225 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7226 if (HadTautologicalLanes) {
7227 // Try to turn PAmts into a splat, since we don't care about the values
7228 // that are currently '0'. If we can't, just keep '0'`s.
7230 // Try to turn KAmts into a splat, since we don't care about the values
7231 // that are currently '-1'. If we can't, change them to '0'`s.
7233 DAG.getConstant(0, DL, ShSVT));
7234 }
7235
7236 PVal = DAG.getBuildVector(VT, DL, PAmts);
7237 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7238 QVal = DAG.getBuildVector(VT, DL, QAmts);
7239 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7240 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7241 "Expected matchBinaryPredicate to return one element for "
7242 "SPLAT_VECTORs");
7243 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7244 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7245 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7246 } else {
7247 PVal = PAmts[0];
7248 KVal = KAmts[0];
7249 QVal = QAmts[0];
7250 }
7251
7252 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7253 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7254 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7255 assert(CompTargetNode.getValueType() == N.getValueType() &&
7256 "Expecting that the types on LHS and RHS of comparisons match.");
7257 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7258 }
7259
7260 // (mul N, P)
7261 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7262 Created.push_back(Op0.getNode());
7263
7264 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7265 // divisors as a performance improvement, since rotating by 0 is a no-op.
7266 if (HadEvenDivisor) {
7267 // We need ROTR to do this.
7268 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7269 return SDValue();
7270 // UREM: (rotr (mul N, P), K)
7271 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7272 Created.push_back(Op0.getNode());
7273 }
7274
7275 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7276 SDValue NewCC =
7277 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7279 if (!HadTautologicalInvertedLanes)
7280 return NewCC;
7281
7282 // If any lanes previously compared always-false, the NewCC will give
7283 // always-true result for them, so we need to fixup those lanes.
7284 // Or the other way around for inequality predicate.
7285 assert(VT.isVector() && "Can/should only get here for vectors.");
7286 Created.push_back(NewCC.getNode());
7287
7288 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7289 // if C2 is not less than C1, the comparison is always false.
7290 // But we have produced the comparison that will give the
7291 // opposive tautological answer. So these lanes would need to be fixed up.
7292 SDValue TautologicalInvertedChannels =
7293 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7294 Created.push_back(TautologicalInvertedChannels.getNode());
7295
7296 // NOTE: we avoid letting illegal types through even if we're before legalize
7297 // ops – legalization has a hard time producing good code for this.
7298 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7299 // If we have a vector select, let's replace the comparison results in the
7300 // affected lanes with the correct tautological result.
7301 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7302 DL, SETCCVT, SETCCVT);
7303 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7304 Replacement, NewCC);
7305 }
7306
7307 // Else, we can just invert the comparison result in the appropriate lanes.
7308 //
7309 // NOTE: see the note above VSELECT above.
7310 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7311 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7312 TautologicalInvertedChannels);
7313
7314 return SDValue(); // Don't know how to lower.
7315}
7316
7317/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7318/// where the divisor is constant and the comparison target is zero,
7319/// return a DAG expression that will generate the same comparison result
7320/// using only multiplications, additions and shifts/rotations.
7321/// Ref: "Hacker's Delight" 10-17.
7322SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7323 SDValue CompTargetNode,
7325 DAGCombinerInfo &DCI,
7326 const SDLoc &DL) const {
7328 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7329 DCI, DL, Built)) {
7330 assert(Built.size() <= 7 && "Max size prediction failed.");
7331 for (SDNode *N : Built)
7332 DCI.AddToWorklist(N);
7333 return Folded;
7334 }
7335
7336 return SDValue();
7337}
7338
7339SDValue
7340TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7341 SDValue CompTargetNode, ISD::CondCode Cond,
7342 DAGCombinerInfo &DCI, const SDLoc &DL,
7343 SmallVectorImpl<SDNode *> &Created) const {
7344 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7345 // Fold:
7346 // (seteq/ne (srem N, D), 0)
7347 // To:
7348 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7349 //
7350 // - D must be constant, with D = D0 * 2^K where D0 is odd
7351 // - P is the multiplicative inverse of D0 modulo 2^W
7352 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7353 // - Q = floor((2 * A) / (2^K))
7354 // where W is the width of the common type of N and D.
7355 //
7356 // When D is a power of two (and thus D0 is 1), the normal
7357 // formula for A and Q don't apply, because the derivation
7358 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7359 // does not apply. This specifically fails when N = INT_MIN.
7360 //
7361 // Instead, for power-of-two D, we use:
7362 // - A = 0
7363 // | -> No offset needed. We're effectively treating it the same as urem.
7364 // - Q = 2^(W-K) - 1
7365 // |-> Test that the top K bits are zero after rotation
7366 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7367 "Only applicable for (in)equality comparisons.");
7368
7369 SelectionDAG &DAG = DCI.DAG;
7370
7371 EVT VT = REMNode.getValueType();
7372 EVT SVT = VT.getScalarType();
7373 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7374 EVT ShSVT = ShVT.getScalarType();
7375
7376 // If we are after ops legalization, and MUL is unavailable, we can not
7377 // proceed.
7378 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7379 return SDValue();
7380
7381 // TODO: Could support comparing with non-zero too.
7382 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7383 if (!CompTarget || !CompTarget->isZero())
7384 return SDValue();
7385
7386 bool HadOneDivisor = false;
7387 bool AllDivisorsAreOnes = true;
7388 bool HadEvenDivisor = false;
7389 bool AllDivisorsArePowerOfTwo = true;
7390 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7391
7392 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7393 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7394 if (C->isZero())
7395 return false;
7396
7397 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7398
7399 // WARNING: this fold is only valid for positive divisors!
7400 // `rem %X, -C` is equivalent to `rem %X, C`
7401 APInt D = C->getAPIntValue().abs();
7402
7403 // If all divisors are ones, we will prefer to avoid the fold.
7404 HadOneDivisor |= D.isOne();
7405 AllDivisorsAreOnes &= D.isOne();
7406
7407 // Decompose D into D0 * 2^K
7408 unsigned K = D.countr_zero();
7409 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7410 APInt D0 = D.lshr(K);
7411
7412 // D is even if it has trailing zeros.
7413 HadEvenDivisor |= (K != 0);
7414
7415 // D is a power-of-two if D0 is one. This includes INT_MIN.
7416 // If all divisors are power-of-two, we will prefer to avoid the fold.
7417 AllDivisorsArePowerOfTwo &= D0.isOne();
7418
7419 // P = inv(D0, 2^W)
7420 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7421 unsigned W = D.getBitWidth();
7422 APInt P = D0.multiplicativeInverse();
7423 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7424
7425 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7426 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7427 A.clearLowBits(K);
7428
7429 // Q = floor((2 * A) / (2^K))
7430 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7431
7433 "We are expecting that A is always less than all-ones for SVT");
7435 "We are expecting that K is always less than all-ones for ShSVT");
7436
7437 // If D was a power of two, apply the alternate constant derivation.
7438 if (D0.isOne()) {
7439 // A = 0
7440 A = APInt(W, 0);
7441 // - Q = 2^(W-K) - 1
7442 Q = APInt::getLowBitsSet(W, W - K);
7443 }
7444
7445 // If the divisor is 1 the result can be constant-folded.
7446 if (D.isOne()) {
7447 // Set P, A and K to a bogus values so we can try to splat them.
7448 P = 0;
7449 A.setAllBits();
7450 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7451
7452 // x ?% 1 == 0 <--> true <--> x u<= -1
7453 Q.setAllBits();
7454 } else {
7455 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7456 }
7457
7458 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7459 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7460 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7461 return true;
7462 };
7463
7464 SDValue N = REMNode.getOperand(0);
7465 SDValue D = REMNode.getOperand(1);
7466
7467 // Collect the values from each element.
7468 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7469 return SDValue();
7470
7471 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7472 if (AllDivisorsAreOnes)
7473 return SDValue();
7474
7475 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7476 // since it can be best implemented as a bit test.
7477 if (AllDivisorsArePowerOfTwo)
7478 return SDValue();
7479
7480 SDValue PVal, AVal, KVal, QVal;
7481 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7482 if (HadOneDivisor) {
7483 // Try to turn PAmts into a splat, since we don't care about the values
7484 // that are currently '0'. If we can't, just keep '0'`s.
7486 // Try to turn AAmts into a splat, since we don't care about the
7487 // values that are currently '-1'. If we can't, change them to '0'`s.
7489 DAG.getConstant(0, DL, SVT));
7490 // Try to turn KAmts into a splat, since we don't care about the values
7491 // that are currently '-1'. If we can't, change them to '0'`s.
7493 DAG.getConstant(0, DL, ShSVT));
7494 }
7495
7496 PVal = DAG.getBuildVector(VT, DL, PAmts);
7497 AVal = DAG.getBuildVector(VT, DL, AAmts);
7498 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7499 QVal = DAG.getBuildVector(VT, DL, QAmts);
7500 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7501 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7502 QAmts.size() == 1 &&
7503 "Expected matchUnaryPredicate to return one element for scalable "
7504 "vectors");
7505 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7506 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7507 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7508 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7509 } else {
7510 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7511 PVal = PAmts[0];
7512 AVal = AAmts[0];
7513 KVal = KAmts[0];
7514 QVal = QAmts[0];
7515 }
7516
7517 // (mul N, P)
7518 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7519 Created.push_back(Op0.getNode());
7520
7521 // We need ADD to do this.
7522 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7523 return SDValue();
7524
7525 // (add (mul N, P), A)
7526 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7527 Created.push_back(Op0.getNode());
7528
7529 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7530 // divisors as a performance improvement, since rotating by 0 is a no-op.
7531 if (HadEvenDivisor) {
7532 // We need ROTR to do this.
7533 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7534 return SDValue();
7535 // SREM: (rotr (add (mul N, P), A), K)
7536 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7537 Created.push_back(Op0.getNode());
7538 }
7539
7540 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7541 return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7543}
7544
7546 const DenormalMode &Mode,
7547 SDNodeFlags Flags) const {
7548 SDLoc DL(Op);
7549 EVT VT = Op.getValueType();
7550 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7551 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7552
7553 // This is specifically a check for the handling of denormal inputs, not the
7554 // result.
7555 if (Mode.Input == DenormalMode::PreserveSign ||
7556 Mode.Input == DenormalMode::PositiveZero) {
7557 // Test = X == 0.0
7558 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
7559 /*Signaling=*/false, Flags);
7560 }
7561
7562 // Testing it with denormal inputs to avoid wrong estimate.
7563 //
7564 // Test = fabs(X) < SmallestNormal
7565 const fltSemantics &FltSem = VT.getFltSemantics();
7566 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7567 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7568 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op, Flags);
7569 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT, /*Chain=*/{},
7570 /*Signaling=*/false, Flags);
7571}
7572
7574 bool LegalOps, bool OptForSize,
7576 unsigned Depth) const {
7577 // fneg is removable even if it has multiple uses.
7578 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7580 return Op.getOperand(0);
7581 }
7582
7583 // Don't recurse exponentially.
7585 return SDValue();
7586
7587 // Pre-increment recursion depth for use in recursive calls.
7588 ++Depth;
7589 const SDNodeFlags Flags = Op->getFlags();
7590 EVT VT = Op.getValueType();
7591 unsigned Opcode = Op.getOpcode();
7592
7593 // Don't allow anything with multiple uses unless we know it is free.
7594 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7595 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7596 isFPExtFree(VT, Op.getOperand(0).getValueType());
7597 if (!IsFreeExtend)
7598 return SDValue();
7599 }
7600
7601 auto RemoveDeadNode = [&](SDValue N) {
7602 if (N && N.getNode()->use_empty())
7603 DAG.RemoveDeadNode(N.getNode());
7604 };
7605
7606 SDLoc DL(Op);
7607
7608 // Because getNegatedExpression can delete nodes we need a handle to keep
7609 // temporary nodes alive in case the recursion manages to create an identical
7610 // node.
7611 std::list<HandleSDNode> Handles;
7612
7613 switch (Opcode) {
7614 case ISD::ConstantFP: {
7615 // Don't invert constant FP values after legalization unless the target says
7616 // the negated constant is legal.
7617 bool IsOpLegal =
7619 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7620 OptForSize);
7621
7622 if (LegalOps && !IsOpLegal)
7623 break;
7624
7625 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7626 V.changeSign();
7627 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7628
7629 // If we already have the use of the negated floating constant, it is free
7630 // to negate it even it has multiple uses.
7631 if (!Op.hasOneUse() && CFP.use_empty())
7632 break;
7634 return CFP;
7635 }
7636 case ISD::SPLAT_VECTOR: {
7637 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7638 SDValue X = Op.getOperand(0);
7640 break;
7641
7642 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7643 if (!NegX)
7644 break;
7646 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7647 }
7648 case ISD::BUILD_VECTOR: {
7649 // Only permit BUILD_VECTOR of constants.
7650 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7651 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7652 }))
7653 break;
7654
7655 bool IsOpLegal =
7658 llvm::all_of(Op->op_values(), [&](SDValue N) {
7659 return N.isUndef() ||
7660 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7661 OptForSize);
7662 });
7663
7664 if (LegalOps && !IsOpLegal)
7665 break;
7666
7668 for (SDValue C : Op->op_values()) {
7669 if (C.isUndef()) {
7670 Ops.push_back(C);
7671 continue;
7672 }
7673 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7674 V.changeSign();
7675 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7676 }
7678 return DAG.getBuildVector(VT, DL, Ops);
7679 }
7680 case ISD::FADD: {
7681 if (!Flags.hasNoSignedZeros())
7682 break;
7683
7684 // After operation legalization, it might not be legal to create new FSUBs.
7685 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7686 break;
7687 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7688
7689 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7691 SDValue NegX =
7692 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7693 // Prevent this node from being deleted by the next call.
7694 if (NegX)
7695 Handles.emplace_back(NegX);
7696
7697 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7699 SDValue NegY =
7700 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7701
7702 // We're done with the handles.
7703 Handles.clear();
7704
7705 // Negate the X if its cost is less or equal than Y.
7706 if (NegX && (CostX <= CostY)) {
7707 Cost = CostX;
7708 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7709 if (NegY != N)
7710 RemoveDeadNode(NegY);
7711 return N;
7712 }
7713
7714 // Negate the Y if it is not expensive.
7715 if (NegY) {
7716 Cost = CostY;
7717 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7718 if (NegX != N)
7719 RemoveDeadNode(NegX);
7720 return N;
7721 }
7722 break;
7723 }
7724 case ISD::FSUB: {
7725 // We can't turn -(A-B) into B-A when we honor signed zeros.
7726 if (!Flags.hasNoSignedZeros())
7727 break;
7728
7729 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7730 // fold (fneg (fsub 0, Y)) -> Y
7731 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7732 if (C->isZero()) {
7734 return Y;
7735 }
7736
7737 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7739 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7740 }
7741 case ISD::FMUL:
7742 case ISD::FDIV: {
7743 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7744
7745 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7747 SDValue NegX =
7748 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7749 // Prevent this node from being deleted by the next call.
7750 if (NegX)
7751 Handles.emplace_back(NegX);
7752
7753 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7755 SDValue NegY =
7756 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7757
7758 // We're done with the handles.
7759 Handles.clear();
7760
7761 // Negate the X if its cost is less or equal than Y.
7762 if (NegX && (CostX <= CostY)) {
7763 Cost = CostX;
7764 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7765 if (NegY != N)
7766 RemoveDeadNode(NegY);
7767 return N;
7768 }
7769
7770 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7771 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7772 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7773 break;
7774
7775 // Negate the Y if it is not expensive.
7776 if (NegY) {
7777 Cost = CostY;
7778 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7779 if (NegX != N)
7780 RemoveDeadNode(NegX);
7781 return N;
7782 }
7783 break;
7784 }
7785 case ISD::FMA:
7786 case ISD::FMULADD:
7787 case ISD::FMAD: {
7788 if (!Flags.hasNoSignedZeros())
7789 break;
7790
7791 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7793 SDValue NegZ =
7794 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7795 // Give up if fail to negate the Z.
7796 if (!NegZ)
7797 break;
7798
7799 // Prevent this node from being deleted by the next two calls.
7800 Handles.emplace_back(NegZ);
7801
7802 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7804 SDValue NegX =
7805 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7806 // Prevent this node from being deleted by the next call.
7807 if (NegX)
7808 Handles.emplace_back(NegX);
7809
7810 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7812 SDValue NegY =
7813 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7814
7815 // We're done with the handles.
7816 Handles.clear();
7817
7818 // Negate the X if its cost is less or equal than Y.
7819 if (NegX && (CostX <= CostY)) {
7820 Cost = std::min(CostX, CostZ);
7821 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7822 if (NegY != N)
7823 RemoveDeadNode(NegY);
7824 return N;
7825 }
7826
7827 // Negate the Y if it is not expensive.
7828 if (NegY) {
7829 Cost = std::min(CostY, CostZ);
7830 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7831 if (NegX != N)
7832 RemoveDeadNode(NegX);
7833 return N;
7834 }
7835 break;
7836 }
7837
7838 case ISD::FP_EXTEND:
7839 case ISD::FSIN:
7840 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7841 OptForSize, Cost, Depth))
7842 return DAG.getNode(Opcode, DL, VT, NegV);
7843 break;
7844 case ISD::FP_ROUND:
7845 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7846 OptForSize, Cost, Depth))
7847 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7848 break;
7849 case ISD::SELECT:
7850 case ISD::VSELECT: {
7851 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7852 // iff at least one cost is cheaper and the other is neutral/cheaper
7853 SDValue LHS = Op.getOperand(1);
7855 SDValue NegLHS =
7856 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7857 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7858 RemoveDeadNode(NegLHS);
7859 break;
7860 }
7861
7862 // Prevent this node from being deleted by the next call.
7863 Handles.emplace_back(NegLHS);
7864
7865 SDValue RHS = Op.getOperand(2);
7867 SDValue NegRHS =
7868 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7869
7870 // We're done with the handles.
7871 Handles.clear();
7872
7873 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7874 (CostLHS != NegatibleCost::Cheaper &&
7875 CostRHS != NegatibleCost::Cheaper)) {
7876 RemoveDeadNode(NegLHS);
7877 RemoveDeadNode(NegRHS);
7878 break;
7879 }
7880
7881 Cost = std::min(CostLHS, CostRHS);
7882 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7883 }
7884 }
7885
7886 return SDValue();
7887}
7888
7889//===----------------------------------------------------------------------===//
7890// Legalization Utilities
7891//===----------------------------------------------------------------------===//
7892
7893bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7894 SDValue LHS, SDValue RHS,
7896 EVT HiLoVT, SelectionDAG &DAG,
7897 MulExpansionKind Kind, SDValue LL,
7898 SDValue LH, SDValue RL, SDValue RH) const {
7899 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7900 Opcode == ISD::SMUL_LOHI);
7901
7902 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7904 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7906 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7908 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7910
7911 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7912 return false;
7913
7914 unsigned OuterBitSize = VT.getScalarSizeInBits();
7915 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7916
7917 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7918 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7919 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7920
7921 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7922 bool Signed) -> bool {
7923 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7924 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7925 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7926 Hi = Lo.getValue(1);
7927 return true;
7928 }
7929 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7930 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7931 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7932 return true;
7933 }
7934 return false;
7935 };
7936
7937 SDValue Lo, Hi;
7938
7939 if (!LL.getNode() && !RL.getNode() &&
7941 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7942 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7943 }
7944
7945 if (!LL.getNode())
7946 return false;
7947
7948 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7949 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7950 DAG.MaskedValueIsZero(RHS, HighMask)) {
7951 // The inputs are both zero-extended.
7952 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7953 Result.push_back(Lo);
7954 Result.push_back(Hi);
7955 if (Opcode != ISD::MUL) {
7956 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7957 Result.push_back(Zero);
7958 Result.push_back(Zero);
7959 }
7960 return true;
7961 }
7962 }
7963
7964 if (!VT.isVector() && Opcode == ISD::MUL &&
7965 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7966 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7967 // The input values are both sign-extended.
7968 // TODO non-MUL case?
7969 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7970 Result.push_back(Lo);
7971 Result.push_back(Hi);
7972 return true;
7973 }
7974 }
7975
7976 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7977 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7978
7979 if (!LH.getNode() && !RH.getNode() &&
7982 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7983 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7984 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7985 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7986 }
7987
7988 if (!LH.getNode())
7989 return false;
7990
7991 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7992 return false;
7993
7994 Result.push_back(Lo);
7995
7996 if (Opcode == ISD::MUL) {
7997 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7998 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7999 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
8000 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
8001 Result.push_back(Hi);
8002 return true;
8003 }
8004
8005 // Compute the full width result.
8006 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8007 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
8008 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8009 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
8010 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
8011 };
8012
8013 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8014 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8015 return false;
8016
8017 // This is effectively the add part of a multiply-add of half-sized operands,
8018 // so it cannot overflow.
8019 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8020
8021 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8022 return false;
8023
8024 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
8025 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8026
8027 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
8029 if (UseGlue)
8030 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
8031 Merge(Lo, Hi));
8032 else
8033 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
8034 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
8035
8036 SDValue Carry = Next.getValue(1);
8037 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8038 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8039
8040 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8041 return false;
8042
8043 if (UseGlue)
8044 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
8045 Carry);
8046 else
8047 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
8048 Zero, Carry);
8049
8050 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8051
8052 if (Opcode == ISD::SMUL_LOHI) {
8053 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8054 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
8055 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
8056
8057 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8058 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8059 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8060 }
8061
8062 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8063 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8064 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8065 return true;
8066}
8067
8069 SelectionDAG &DAG, MulExpansionKind Kind,
8070 SDValue LL, SDValue LH, SDValue RL,
8071 SDValue RH) const {
8073 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8074 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8075 DAG, Kind, LL, LH, RL, RH);
8076 if (Ok) {
8077 assert(Result.size() == 2);
8078 Lo = Result[0];
8079 Hi = Result[1];
8080 }
8081 return Ok;
8082}
8083
8084// Optimize unsigned division or remainder by constants for types twice as large
8085// as a legal VT.
8086//
8087// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8088// can be computed
8089// as:
8090// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8091// Remainder = Sum % Constant;
8092//
8093// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8094// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8095// High:Low into 3 chunks of W bits and compute remainder as
8096// Sum = Chunk0 + Chunk1 + Chunk2;
8097// Remainder = Sum % Constant;
8098//
8099// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8100//
8101// For division, we can compute the remainder using the algorithm described
8102// above, subtract it from the dividend to get an exact multiple of Constant.
8103// Then multiply that exact multiply by the multiplicative inverse modulo
8104// (1 << (BitWidth / 2)) to get the quotient.
8105
8106// If Constant is even, we can shift right the dividend and the divisor by the
8107// number of trailing zeros in Constant before applying the remainder algorithm.
8108// If we're after the quotient, we can subtract this value from the shifted
8109// dividend and multiply by the multiplicative inverse of the shifted divisor.
8110// If we want the remainder, we shift the value left by the number of trailing
8111// zeros and add the bits that were shifted out of the dividend.
8112bool TargetLowering::expandUDIVREMByConstantViaUREMDecomposition(
8113 SDNode *N, APInt Divisor, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
8114 SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8115 unsigned Opcode = N->getOpcode();
8116 EVT VT = N->getValueType(0);
8117
8118 unsigned BitWidth = Divisor.getBitWidth();
8119 unsigned HBitWidth = BitWidth / 2;
8121 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8122
8123 // If the divisor is even, shift it until it becomes odd.
8124 unsigned TrailingZeros = 0;
8125 if (!Divisor[0]) {
8126 TrailingZeros = Divisor.countr_zero();
8127 Divisor.lshrInPlace(TrailingZeros);
8128 }
8129
8130 // After removing trailing zeros, the divisor needs to be less than
8131 // (1 << HBitWidth).
8132 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8133 if (Divisor.uge(HalfMaxPlus1))
8134 return false;
8135
8136 // Look for the largest chunk width W such that (1 << W) % Divisor == 1 or
8137 // (1 << W) % Divisor == -1.
8138 unsigned BestChunkWidth = 0, AltChunkWidth = 0;
8139 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8140 // Skip HBitWidth-1, it doesn't have enough bits for carries.
8141 if (I == HBitWidth - 1)
8142 continue;
8143
8144 APInt Mod = APInt::getOneBitSet(Divisor.getBitWidth(), I).urem(Divisor);
8145
8146 if (Mod.isOne()) {
8147 BestChunkWidth = I;
8148 break;
8149 }
8150
8151 // We have an alternate strategy for Remainder == Divisor - 1.
8152 // FIXME: Support HBitWidth.
8153 if (I != HBitWidth && Mod == Divisor - 1)
8154 AltChunkWidth = I;
8155 }
8156
8157 bool Alternate = false;
8158 if (!BestChunkWidth) {
8159 if (!AltChunkWidth)
8160 return false;
8161 Alternate = true;
8162 BestChunkWidth = AltChunkWidth;
8163 }
8164
8165 SDLoc dl(N);
8166
8167 assert(!LL == !LH && "Expected both input halves or no input halves!");
8168 if (!LL)
8169 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8170
8171 bool HasFSHR = isOperationLegal(ISD::FSHR, HiLoVT);
8172
8173 auto GetFSHR = [&](SDValue Lo, SDValue Hi, unsigned ShiftAmt) {
8174 assert(ShiftAmt > 0 && ShiftAmt < HBitWidth);
8175 if (HasFSHR)
8176 return DAG.getNode(ISD::FSHR, dl, HiLoVT, Hi, Lo,
8177 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8178 return DAG.getNode(
8179 ISD::OR, dl, HiLoVT,
8180 DAG.getNode(ISD::SRL, dl, HiLoVT, Lo,
8181 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl)),
8182 DAG.getNode(
8183 ISD::SHL, dl, HiLoVT, Hi,
8184 DAG.getShiftAmountConstant(HBitWidth - ShiftAmt, HiLoVT, dl)));
8185 };
8186
8187 // Helper to perform a right shift on a 128-bit value split into two halves.
8188 // Handles shifts >= HBitWidth by moving Hi to Lo and shifting Hi.
8189 auto ShiftRight = [&](SDValue &Lo, SDValue &Hi, unsigned ShiftAmt) {
8190 if (ShiftAmt == 0)
8191 return;
8192 if (ShiftAmt < HBitWidth) {
8193 Lo = GetFSHR(Lo, Hi, ShiftAmt);
8194 Hi = DAG.getNode(ISD::SRL, dl, HiLoVT, Hi,
8195 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8196 } else if (ShiftAmt == HBitWidth) {
8197 Lo = Hi;
8198 Hi = DAG.getConstant(0, dl, HiLoVT);
8199 } else {
8200 Lo = DAG.getNode(
8201 ISD::SRL, dl, HiLoVT, Hi,
8202 DAG.getShiftAmountConstant(ShiftAmt - HBitWidth, HiLoVT, dl));
8203 Hi = DAG.getConstant(0, dl, HiLoVT);
8204 }
8205 };
8206
8207 // Shift the input by the number of TrailingZeros in the divisor. The
8208 // shifted out bits will be added to the remainder later.
8209 SDValue PartialRemL, PartialRemH;
8210 if (TrailingZeros && Opcode != ISD::UDIV) {
8211 // Save the shifted off bits if we need the remainder.
8212 if (TrailingZeros < HBitWidth) {
8213 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8214 PartialRemL = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8215 DAG.getConstant(Mask, dl, HiLoVT));
8216 } else if (TrailingZeros == HBitWidth) {
8217 // All of LL is part of the remainder.
8218 PartialRemL = LL;
8219 } else {
8220 // TrailingZeros > HBitWidth: LL and part of LH are the remainder.
8221 PartialRemL = LL;
8222 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros - HBitWidth);
8223 PartialRemH = DAG.getNode(ISD::AND, dl, HiLoVT, LH,
8224 DAG.getConstant(Mask, dl, HiLoVT));
8225 }
8226 }
8227
8228 SDValue Sum;
8229 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8230 // out, add that to the final sum.
8231 if (BestChunkWidth == HBitWidth) {
8232 assert(!Alternate);
8233 // Shift LH:LL right if there were trailing zeros in the divisor.
8234 ShiftRight(LL, LH, TrailingZeros);
8235
8236 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8237 EVT SetCCType =
8238 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8240 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8241 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8242 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8243 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8244 } else {
8245 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8246 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8247 // If the boolean for the target is 0 or 1, we can add the setcc result
8248 // directly.
8249 if (getBooleanContents(HiLoVT) ==
8251 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8252 else
8253 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8254 DAG.getConstant(0, dl, HiLoVT));
8255 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8256 }
8257 } else {
8258 // Otherwise split into multple chunks and add them together. We chose
8259 // BestChunkWidth so that the sum will not overflow.
8260 SDValue Mask = DAG.getConstant(
8261 APInt::getLowBitsSet(HBitWidth, BestChunkWidth), dl, HiLoVT);
8262
8263 for (unsigned I = 0; I < BitWidth - TrailingZeros; I += BestChunkWidth) {
8264 // If there were trailing zeros in the divisor, increase the shift amount.
8265 unsigned Shift = I + TrailingZeros;
8266 SDValue Chunk;
8267 if (Shift == 0)
8268 Chunk = LL;
8269 else if (Shift >= HBitWidth)
8270 Chunk = DAG.getNode(
8271 ISD::SRL, dl, HiLoVT, LH,
8272 DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, dl));
8273 else
8274 Chunk = GetFSHR(LL, LH, Shift);
8275 // If we're on the last chunk, we don't need an AND.
8276 if (I + BestChunkWidth < BitWidth - TrailingZeros)
8277 Chunk = DAG.getNode(ISD::AND, dl, HiLoVT, Chunk, Mask);
8278 if (!Sum) {
8279 Sum = Chunk;
8280 } else {
8281 // For Alternate, we need to subtract odd chunks.
8282 unsigned ChunkNum = I / BestChunkWidth;
8283 unsigned Opc = (Alternate && (ChunkNum % 2) != 0) ? ISD::SUB : ISD::ADD;
8284 Sum = DAG.getNode(Opc, dl, HiLoVT, Sum, Chunk);
8285 }
8286 }
8287
8288 // For Alternate, the sum may be negative, but we need a positive sum. We
8289 // can increase it by a multiple of the divisor to make it positive. For 3
8290 // chunks the largest negative value is -(2^BestChunkWidth - 1). For 4
8291 // chunks, it's 2*-(2^BestChunkWidth - 1). We know that 2^BestChunkWidth + 1
8292 // is a multiple of the divisor. Add that 1 or 2 times to make the sum
8293 // positive.
8294 if (Alternate) {
8295 unsigned NumChunks = divideCeil(BitWidth - TrailingZeros, BestChunkWidth);
8296 assert(NumChunks <= 4);
8297
8298 APInt Adjust = APInt::getOneBitSet(HBitWidth, BestChunkWidth);
8299 Adjust.setBit(0);
8300 // If there are 4 chunks, we need to adjust twice.
8301 if (NumChunks == 4)
8302 Adjust <<= 1;
8303 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum,
8304 DAG.getConstant(Adjust, dl, HiLoVT));
8305 }
8306 }
8307
8308 // Perform a HiLoVT urem on the Sum using truncated divisor.
8309 SDValue RemL =
8310 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8311 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8312 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8313
8314 if (Opcode != ISD::UREM) {
8315 // If we didn't shift LH/LR earlier, do it now.
8316 if (BestChunkWidth != HBitWidth)
8317 ShiftRight(LL, LH, TrailingZeros);
8318
8319 // Subtract the remainder from the shifted dividend.
8320 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8321 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8322
8323 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8324
8325 // Multiply by the multiplicative inverse of the divisor modulo
8326 // (1 << BitWidth).
8327 APInt MulFactor = Divisor.multiplicativeInverse();
8328
8329 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8330 DAG.getConstant(MulFactor, dl, VT));
8331
8332 // Split the quotient into low and high parts.
8333 SDValue QuotL, QuotH;
8334 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8335 Result.push_back(QuotL);
8336 Result.push_back(QuotH);
8337 }
8338
8339 if (Opcode != ISD::UDIV) {
8340 // If we shifted the input, shift the remainder left and add the bits we
8341 // shifted off the input.
8342 if (TrailingZeros) {
8343 if (TrailingZeros < HBitWidth) {
8344 // Shift RemH:RemL left by TrailingZeros.
8345 // RemH gets the high bits shifted out of RemL.
8346 RemH = DAG.getNode(
8347 ISD::SRL, dl, HiLoVT, RemL,
8348 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros, HiLoVT, dl));
8349 RemL =
8350 DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8351 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8352 // OR in the partial remainder.
8353 RemL = DAG.getNode(ISD::OR, dl, HiLoVT, RemL, PartialRemL,
8355 } else if (TrailingZeros == HBitWidth) {
8356 // Shift left by exactly HBitWidth: RemH becomes RemL, RemL becomes
8357 // PartialRemL.
8358 RemH = RemL;
8359 RemL = PartialRemL;
8360 } else {
8361 // Shift left by more than HBitWidth.
8362 RemH = DAG.getNode(
8363 ISD::SHL, dl, HiLoVT, RemL,
8364 DAG.getShiftAmountConstant(TrailingZeros - HBitWidth, HiLoVT, dl));
8365 RemH = DAG.getNode(ISD::OR, dl, HiLoVT, RemH, PartialRemH,
8367 RemL = PartialRemL;
8368 }
8369 }
8370 Result.push_back(RemL);
8371 Result.push_back(RemH);
8372 }
8373
8374 return true;
8375}
8376
8377bool TargetLowering::expandUDIVREMByConstantViaUMulHiMagic(
8378 SDNode *N, const APInt &Divisor, SmallVectorImpl<SDValue> &Result,
8379 EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8380
8381 SDValue N0 = N->getOperand(0);
8382 EVT VT = N0->getValueType(0);
8383 SDLoc DL{N};
8384
8385 assert(!Divisor.isOne() && "Magic algorithm does not work for division by 1");
8386
8387 // This helper creates a MUL_LOHI of the pair (LL, LH) by a constant.
8388 auto MakeMUL_LOHIByConst = [&](unsigned Opc, SDValue LL, SDValue LH,
8389 const APInt &Const,
8390 SmallVectorImpl<SDValue> &Result) {
8391 SDValue LHS = DAG.getNode(ISD::BUILD_PAIR, DL, VT, LL, LH);
8392 SDValue RHS = DAG.getConstant(Const, DL, VT);
8393 auto [RL, RH] = DAG.SplitScalar(RHS, DL, HiLoVT, HiLoVT);
8394 return expandMUL_LOHI(Opc, VT, DL, LHS, RHS, Result, HiLoVT, DAG,
8396 LL, LH, RL, RH);
8397 };
8398
8399 // This helper creates an ADD/SUB of the pairs (LL, LH) and (RL, RH).
8400 auto MakeAddSubLong = [&](unsigned Opc, SDValue LL, SDValue LH, SDValue RL,
8401 SDValue RH) {
8402 SDValue AddSubNode =
8404 DAG.getVTList(HiLoVT, MVT::i1), LL, RL);
8405 SDValue OutL = AddSubNode.getValue(0);
8406 SDValue Overflow = AddSubNode.getValue(1);
8407 SDValue AddSubWithOverflow =
8409 DAG.getVTList(HiLoVT, MVT::i1), LH, RH, Overflow);
8410 SDValue OutH = AddSubWithOverflow.getValue(0);
8411 return std::make_pair(OutL, OutH);
8412 };
8413
8414 // This helper creates a SRL of the pair (LL, LH) by Shift.
8415 auto MakeSRLLong = [&](SDValue LL, SDValue LH, unsigned Shift) {
8416 unsigned HBitWidth = HiLoVT.getScalarSizeInBits();
8417 if (Shift < HBitWidth) {
8418 SDValue ShAmt = DAG.getShiftAmountConstant(Shift, HiLoVT, DL);
8419 SDValue ResL = DAG.getNode(ISD::FSHR, DL, HiLoVT, LH, LL, ShAmt);
8420 SDValue ResH = DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt);
8421 return std::make_pair(ResL, ResH);
8422 }
8423 SDValue Zero = DAG.getConstant(0, DL, HiLoVT);
8424 if (Shift == HBitWidth)
8425 return std::make_pair(LH, Zero);
8426 assert(Shift - HBitWidth < HBitWidth &&
8427 "We shouldn't generate an undefined shift");
8428 SDValue ShAmt = DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, DL);
8429 return std::make_pair(DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt), Zero);
8430 };
8431
8432 // Knowledge of leading zeros may help to reduce the multiplier.
8433 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
8434
8435 UnsignedDivisionByConstantInfo Magics = UnsignedDivisionByConstantInfo::get(
8436 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
8437
8438 assert(!LL == !LH && "Expected both input halves or no input halves!");
8439 if (!LL)
8440 std::tie(LL, LH) = DAG.SplitScalar(N0, DL, HiLoVT, HiLoVT);
8441 SDValue QL = LL;
8442 SDValue QH = LH;
8443 if (Magics.PreShift != 0)
8444 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PreShift);
8445
8446 SmallVector<SDValue, 4> UMulResult;
8447 if (!MakeMUL_LOHIByConst(ISD::UMUL_LOHI, QL, QH, Magics.Magic, UMulResult))
8448 return false;
8449
8450 QL = UMulResult[2];
8451 QH = UMulResult[3];
8452
8453 if (Magics.IsAdd) {
8454 auto [NPQL, NPQH] = MakeAddSubLong(ISD::SUB, LL, LH, QL, QH);
8455 std::tie(NPQL, NPQH) = MakeSRLLong(NPQL, NPQH, 1);
8456 std::tie(QL, QH) = MakeAddSubLong(ISD::ADD, NPQL, NPQH, QL, QH);
8457 }
8458
8459 if (Magics.PostShift != 0)
8460 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PostShift);
8461
8462 unsigned Opcode = N->getOpcode();
8463 if (Opcode != ISD::UREM) {
8464 Result.push_back(QL);
8465 Result.push_back(QH);
8466 }
8467
8468 if (Opcode != ISD::UDIV) {
8469 SmallVector<SDValue, 2> MulResult;
8470 if (!MakeMUL_LOHIByConst(ISD::MUL, QL, QH, Divisor, MulResult))
8471 return false;
8472
8473 assert(MulResult.size() == 2);
8474
8475 auto [RemL, RemH] =
8476 MakeAddSubLong(ISD::SUB, LL, LH, MulResult[0], MulResult[1]);
8477
8478 Result.push_back(RemL);
8479 Result.push_back(RemH);
8480 }
8481
8482 return true;
8483}
8484
8487 EVT HiLoVT, SelectionDAG &DAG,
8488 SDValue LL, SDValue LH) const {
8489 unsigned Opcode = N->getOpcode();
8490
8491 // TODO: Support signed division/remainder.
8492 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8493 return false;
8494 assert(
8495 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8496 "Unexpected opcode");
8497
8498 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8499 if (!CN)
8500 return false;
8501
8502 APInt Divisor = CN->getAPIntValue();
8503
8504 // We depend on the UREM by constant optimization in DAGCombiner that requires
8505 // high multiply.
8506 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8508 return false;
8509
8510 // Don't expand if optimizing for size.
8511 if (DAG.shouldOptForSize())
8512 return false;
8513
8514 // Early out for 0 or 1 divisors.
8515 if (Divisor.ule(1))
8516 return false;
8517
8518 if (expandUDIVREMByConstantViaUREMDecomposition(N, Divisor, Result, HiLoVT,
8519 DAG, LL, LH))
8520 return true;
8521
8522 if (expandUDIVREMByConstantViaUMulHiMagic(N, Divisor, Result, HiLoVT, DAG, LL,
8523 LH))
8524 return true;
8525
8526 return false;
8527}
8528
8529// Check that (every element of) Z is undef or not an exact multiple of BW.
8530static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8532 Z,
8533 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8534 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8535}
8536
8538 EVT VT = Node->getValueType(0);
8539 SDValue ShX, ShY;
8540 SDValue ShAmt, InvShAmt;
8541 SDValue X = Node->getOperand(0);
8542 SDValue Y = Node->getOperand(1);
8543 SDValue Z = Node->getOperand(2);
8544 SDValue Mask = Node->getOperand(3);
8545 SDValue VL = Node->getOperand(4);
8546
8547 unsigned BW = VT.getScalarSizeInBits();
8548 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8549 SDLoc DL(SDValue(Node, 0));
8550
8551 EVT ShVT = Z.getValueType();
8552 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8553 // fshl: X << C | Y >> (BW - C)
8554 // fshr: X << (BW - C) | Y >> C
8555 // where C = Z % BW is not zero
8556 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8557 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8558 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8559 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8560 VL);
8561 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8562 VL);
8563 } else {
8564 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8565 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8566 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8567 if (isPowerOf2_32(BW)) {
8568 // Z % BW -> Z & (BW - 1)
8569 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8570 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8571 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8572 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8573 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8574 } else {
8575 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8576 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8577 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8578 }
8579
8580 SDValue One = DAG.getConstant(1, DL, ShVT);
8581 if (IsFSHL) {
8582 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8583 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8584 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8585 } else {
8586 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8587 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8588 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8589 }
8590 }
8591 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8592}
8593
8595 SelectionDAG &DAG) const {
8596 if (Node->isVPOpcode())
8597 return expandVPFunnelShift(Node, DAG);
8598
8599 EVT VT = Node->getValueType(0);
8600
8601 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8605 return SDValue();
8606
8607 SDValue X = Node->getOperand(0);
8608 SDValue Y = Node->getOperand(1);
8609 SDValue Z = Node->getOperand(2);
8610
8611 unsigned BW = VT.getScalarSizeInBits();
8612 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8613 SDLoc DL(SDValue(Node, 0));
8614
8615 EVT ShVT = Z.getValueType();
8616
8617 // If a funnel shift in the other direction is more supported, use it.
8618 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8619 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8620 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8621 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8622 // fshl X, Y, Z -> fshr X, Y, -Z
8623 // fshr X, Y, Z -> fshl X, Y, -Z
8624 Z = DAG.getNegative(Z, DL, ShVT);
8625 } else {
8626 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8627 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8628 SDValue One = DAG.getConstant(1, DL, ShVT);
8629 if (IsFSHL) {
8630 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8631 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8632 } else {
8633 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8634 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8635 }
8636 Z = DAG.getNOT(DL, Z, ShVT);
8637 }
8638 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8639 }
8640
8641 SDValue ShX, ShY;
8642 SDValue ShAmt, InvShAmt;
8643 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8644 // fshl: X << C | Y >> (BW - C)
8645 // fshr: X << (BW - C) | Y >> C
8646 // where C = Z % BW is not zero
8647 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8648 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8649 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8650 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8651 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8652 } else {
8653 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8654 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8655 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8656 if (isPowerOf2_32(BW)) {
8657 // Z % BW -> Z & (BW - 1)
8658 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8659 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8660 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8661 } else {
8662 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8663 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8664 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8665 }
8666
8667 SDValue One = DAG.getConstant(1, DL, ShVT);
8668 if (IsFSHL) {
8669 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8670 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8671 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8672 } else {
8673 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8674 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8675 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8676 }
8677 }
8678 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8679}
8680
8681// TODO: Merge with expandFunnelShift.
8683 SelectionDAG &DAG) const {
8684 EVT VT = Node->getValueType(0);
8685 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8686 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8687 SDValue Op0 = Node->getOperand(0);
8688 SDValue Op1 = Node->getOperand(1);
8689 SDLoc DL(SDValue(Node, 0));
8690
8691 EVT ShVT = Op1.getValueType();
8692 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8693
8694 // If a rotate in the other direction is more supported, use it.
8695 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8696 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8697 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8698 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8699 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8700 }
8701
8702 if (!AllowVectorOps && VT.isVector() &&
8708 return SDValue();
8709
8710 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8711 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8712 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8713 SDValue ShVal;
8714 SDValue HsVal;
8715 if (isPowerOf2_32(EltSizeInBits)) {
8716 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8717 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8718 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8719 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8720 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8721 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8722 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8723 } else {
8724 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8725 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8726 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8727 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8728 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8729 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8730 SDValue One = DAG.getConstant(1, DL, ShVT);
8731 HsVal =
8732 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8733 }
8734 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8735}
8736
8737/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8738/// a chain of halving decompositions (halving element width) and/or vector
8739/// widening (doubling element count). This guides expansion strategy selection:
8740/// if true, the halving/widening path produces better code than bit-by-bit.
8741///
8742/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8743/// Widening steps are cheap (O(1) pad/extract) and don't count.
8744/// Limiting halvings to 2 prevents exponential blowup:
8745/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8746/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8747/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8749 EVT VT, unsigned HalveDepth = 0,
8750 unsigned TotalDepth = 0) {
8751 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8752 return false;
8754 return true;
8755 if (!TLI.isTypeLegal(VT))
8756 return false;
8757
8758 unsigned BW = VT.getScalarSizeInBits();
8759
8760 // Halve: halve element width, same element count.
8761 // This is the expensive step -- each halving creates ~4x more operations.
8762 if (BW % 2 == 0) {
8763 EVT HalfEltVT = EVT::getIntegerVT(Ctx, BW / 2);
8764 EVT HalfVT = VT.changeVectorElementType(Ctx, HalfEltVT);
8765 if (TLI.isTypeLegal(HalfVT) &&
8766 canNarrowCLMULToLegal(TLI, Ctx, HalfVT, HalveDepth + 1, TotalDepth + 1))
8767 return true;
8768 }
8769
8770 // Widen: double element count (fixed-width vectors only).
8771 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8772 EVT WideVT = VT.getDoubleNumVectorElementsVT(Ctx);
8773 if (TLI.isTypeLegal(WideVT) &&
8774 canNarrowCLMULToLegal(TLI, Ctx, WideVT, HalveDepth, TotalDepth + 1))
8775 return true;
8776
8777 return false;
8778}
8779
8781 SDLoc DL(Node);
8782 EVT VT = Node->getValueType(0);
8783 SDValue X = Node->getOperand(0);
8784 SDValue Y = Node->getOperand(1);
8785 unsigned BW = VT.getScalarSizeInBits();
8786 unsigned Opcode = Node->getOpcode();
8787 LLVMContext &Ctx = *DAG.getContext();
8788
8789 switch (Opcode) {
8790 case ISD::CLMUL: {
8791 // For vector types, try decomposition strategies that leverage legal
8792 // CLMUL on narrower or wider element types, avoiding the expensive
8793 // bit-by-bit expansion.
8794 if (VT.isVector()) {
8795 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8796 // Applies ExpandIntRes_CLMUL's identity element-wise:
8797 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8798 // where:
8799 // Lo = CLMUL(XLo, YLo)
8800 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8801 unsigned HalfBW = BW / 2;
8802 if (BW % 2 == 0) {
8803 EVT HalfEltVT = EVT::getIntegerVT(Ctx, HalfBW);
8804 EVT HalfVT =
8805 EVT::getVectorVT(Ctx, HalfEltVT, VT.getVectorElementCount());
8806 if (isTypeLegal(HalfVT) && canNarrowCLMULToLegal(*this, Ctx, HalfVT,
8807 /*HalveDepth=*/1)) {
8808 SDValue ShAmt = DAG.getShiftAmountConstant(HalfBW, VT, DL);
8809
8810 // Extract low and high halves of each element.
8811 SDValue XLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, X);
8812 SDValue XHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8813 DAG.getNode(ISD::SRL, DL, VT, X, ShAmt));
8814 SDValue YLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Y);
8815 SDValue YHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8816 DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt));
8817
8818 // Lo = CLMUL(XLo, YLo)
8819 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YLo);
8820
8821 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8822 SDValue LoH = DAG.getNode(ISD::CLMULH, DL, HalfVT, XLo, YLo);
8823 SDValue Cross1 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YHi);
8824 SDValue Cross2 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XHi, YLo);
8825 SDValue Cross = DAG.getNode(ISD::XOR, DL, HalfVT, Cross1, Cross2);
8826 SDValue Hi = DAG.getNode(ISD::XOR, DL, HalfVT, LoH, Cross);
8827
8828 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8829 SDValue LoExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo);
8830 SDValue HiExt = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Hi);
8831 SDValue HiShifted = DAG.getNode(ISD::SHL, DL, VT, HiExt, ShAmt);
8832 return DAG.getNode(ISD::OR, DL, VT, LoExt, HiShifted);
8833 }
8834 }
8835
8836 // Strategy 2: Promote to double-element-width CLMUL.
8837 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8838 {
8839 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8840 if (isTypeLegal(ExtVT) && isOperationLegalOrCustom(ISD::CLMUL, ExtVT)) {
8841 // If CLMUL on ExtVT is Custom (not Legal), the target may
8842 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8843 // fallback costs O(BW) vectorized iterations. Only widen when
8844 // element count is small enough that scalarization is cheaper.
8845 unsigned NumElts = VT.getVectorMinNumElements();
8846 if (isOperationLegal(ISD::CLMUL, ExtVT) || NumElts < BW) {
8847 SDValue XExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, X);
8848 SDValue YExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, Y);
8849 SDValue Mul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8850 return DAG.getNode(ISD::TRUNCATE, DL, VT, Mul);
8851 }
8852 }
8853 }
8854
8855 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8856 // vector, extract lower result). CLMUL is element-wise, so upper
8857 // (undef) lanes don't affect the lower results.
8858 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8859 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8860 EVT WideVT = EVT::getVectorVT(Ctx, VT.getVectorElementType(), EC * 2);
8861 if (isTypeLegal(WideVT) && canNarrowCLMULToLegal(*this, Ctx, WideVT)) {
8862 SDValue Undef = DAG.getUNDEF(WideVT);
8863 SDValue XWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8864 X, DAG.getVectorIdxConstant(0, DL));
8865 SDValue YWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8866 Y, DAG.getVectorIdxConstant(0, DL));
8867 SDValue WideRes = DAG.getNode(ISD::CLMUL, DL, WideVT, XWide, YWide);
8868 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WideRes,
8869 DAG.getVectorIdxConstant(0, DL));
8870 }
8871 }
8872 }
8873
8874 // NOTE: If you change this expansion, please update the cost model
8875 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8876 // Intrinsic::clmul.
8877
8878 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
8879
8880 SDValue Res = DAG.getConstant(0, DL, VT);
8881 for (unsigned I = 0; I < BW; ++I) {
8882 SDValue ShiftAmt = DAG.getShiftAmountConstant(I, VT, DL);
8883 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8884 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8885
8886 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8887 // multiply, use a shift-based expansion to avoid expensive MUL
8888 // instructions.
8889 SDValue Part;
8890 if (!hasBitTest(Y, ShiftAmt) &&
8893 Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8894 } else {
8895 // Canonical bit test: (Y & (1 << I)) != 0
8896 SDValue Zero = DAG.getConstant(0, DL, VT);
8897 SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETEQ);
8898 SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X, ShiftAmt);
8899 Part = DAG.getSelect(DL, VT, Cond, Zero, XShifted);
8900 }
8901 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
8902 }
8903 return Res;
8904 }
8905 case ISD::CLMULR:
8906 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8909 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
8910 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
8911 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
8912 DAG.getShiftAmountConstant(BW - 1, VT, DL));
8913 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8914 DAG.getShiftAmountConstant(1, VT, DL));
8915 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
8916 }
8917 [[fallthrough]];
8918 case ISD::CLMULH: {
8919 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8920 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
8921 // when any of these hold:
8922 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
8923 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
8924 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
8925 // expanded via halving/widening to reach legal CLMUL. The bitreverse
8926 // path creates CLMUL(VT) which will be expanded efficiently. The
8927 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
8928 // causing a cycle.
8929 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
8930 // => trunc path is preferred over the bitreverse path, as it avoids the
8931 // cost of 3 bitreverse operations.
8936 canNarrowCLMULToLegal(*this, Ctx, VT)))) {
8937 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
8938 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
8939 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
8940 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
8941 if (Opcode == ISD::CLMULH)
8942 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8943 DAG.getShiftAmountConstant(1, VT, DL));
8944 return Res;
8945 }
8946 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
8947 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
8948 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8949 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8950 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
8951 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
8952 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
8953 }
8954 }
8955 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8956}
8957
8959 SelectionDAG &DAG) const {
8960 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8961 EVT VT = Node->getValueType(0);
8962 unsigned VTBits = VT.getScalarSizeInBits();
8963 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8964
8965 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8966 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8967 SDValue ShOpLo = Node->getOperand(0);
8968 SDValue ShOpHi = Node->getOperand(1);
8969 SDValue ShAmt = Node->getOperand(2);
8970 EVT ShAmtVT = ShAmt.getValueType();
8971 EVT ShAmtCCVT =
8972 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8973 SDLoc dl(Node);
8974
8975 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8976 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8977 // away during isel.
8978 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8979 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8980 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8981 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8982 : DAG.getConstant(0, dl, VT);
8983
8984 SDValue Tmp2, Tmp3;
8985 if (IsSHL) {
8986 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8987 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8988 } else {
8989 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8990 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8991 }
8992
8993 // If the shift amount is larger or equal than the width of a part we don't
8994 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8995 // values for large shift amounts.
8996 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8997 DAG.getConstant(VTBits, dl, ShAmtVT));
8998 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8999 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
9000
9001 if (IsSHL) {
9002 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9003 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9004 } else {
9005 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9006 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9007 }
9008}
9009
9011 SelectionDAG &DAG) const {
9012 // This implements llvm.canonicalize.f* by multiplication with 1.0, as
9013 // suggested in
9014 // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
9015 // It uses strict_fp operations even outside a strict_fp context in order
9016 // to guarantee that the canonicalization is not optimized away by later
9017 // passes. The result chain introduced by that is intentionally ignored
9018 // since no ordering requirement is intended here.
9019 EVT VT = Node->getValueType(0);
9020 SDLoc DL(Node);
9021 SDNodeFlags Flags = Node->getFlags();
9022 Flags.setNoFPExcept(true);
9023 SDValue One = DAG.getConstantFP(1.0, DL, VT);
9024 SDValue Mul =
9025 DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
9026 {DAG.getEntryNode(), Node->getOperand(0), One}, Flags);
9027 return Mul;
9028}
9029
9030SDValue
9032 SelectionDAG &DAG) const {
9033 SDLoc dl(Node);
9034 EVT DstVT = Node->getValueType(0);
9035 EVT DstScalarVT = DstVT.getScalarType();
9036
9037 SDValue IntVal = Node->getOperand(0);
9038 const uint64_t SemEnum = Node->getConstantOperandVal(1);
9039 const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
9040
9041 // Supported source formats.
9042 switch (Sem) {
9048 break;
9049 default:
9050 DAG.getContext()->emitError("CONVERT_FROM_ARBITRARY_FP: not implemented "
9051 "source format (semantics enum " +
9052 Twine(SemEnum) + ")");
9053 return SDValue();
9054 }
9055
9056 const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(Sem);
9057 const unsigned SrcBits = APFloat::getSizeInBits(SrcSem);
9058 const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
9059 const unsigned SrcMant = SrcPrecision - 1;
9060 const unsigned SrcExp = SrcBits - SrcMant - 1;
9061 const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem);
9062 const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior;
9063
9064 // Destination format parameters.
9065 const fltSemantics &DstSem = DstScalarVT.getFltSemantics();
9066 const unsigned DstBits = APFloat::getSizeInBits(DstSem);
9067 const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1;
9068 const unsigned DstExpBits = DstBits - DstMant - 1;
9069 const int DstMinExp = APFloat::semanticsMinExponent(DstSem);
9070 const int DstBias = 1 - DstMinExp;
9071 const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1;
9072
9073 // Work in an integer type matching the destination float width.
9074 EVT IntScalarVT = EVT::getIntegerVT(*DAG.getContext(), DstBits);
9075 EVT IntVT = DstVT.isVector()
9076 ? EVT::getVectorVT(*DAG.getContext(), IntScalarVT,
9077 DstVT.getVectorElementCount())
9078 : IntScalarVT;
9079
9080 SDValue Src = DAG.getZExtOrTrunc(IntVal, dl, IntVT);
9081
9082 EVT SetCCVT =
9083 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), IntVT);
9084
9085 SDValue Zero = DAG.getConstant(0, dl, IntVT);
9086 SDValue One = DAG.getConstant(1, dl, IntVT);
9087
9088 // Extract bit fields.
9089 const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0;
9090 const uint64_t ExpMask = (1ULL << SrcExp) - 1;
9091
9092 SDValue MantField = DAG.getNode(ISD::AND, dl, IntVT, Src,
9093 DAG.getConstant(MantMask, dl, IntVT));
9094
9095 SDValue ExpField =
9096 DAG.getNode(ISD::AND, dl, IntVT,
9097 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9098 DAG.getShiftAmountConstant(SrcMant, IntVT, dl)),
9099 DAG.getConstant(ExpMask, dl, IntVT));
9100
9101 SDValue SignBit =
9102 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9103 DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl));
9104
9105 SDValue SignShifted =
9106 DAG.getNode(ISD::SHL, dl, IntVT, SignBit,
9107 DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl));
9108
9109 // Classify the input.
9110 SDValue ExpAllOnes = DAG.getConstant(ExpMask, dl, IntVT);
9111 SDValue IsExpAllOnes =
9112 DAG.getSetCC(dl, SetCCVT, ExpField, ExpAllOnes, ISD::SETEQ);
9113 SDValue IsExpZero = DAG.getSetCC(dl, SetCCVT, ExpField, Zero, ISD::SETEQ);
9114 SDValue IsMantZero = DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETEQ);
9115 SDValue IsMantNonZero =
9116 DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETNE);
9117
9118 SDValue IsNaN;
9119 if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) {
9120 IsNaN = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9121 } else if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
9122 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantNonZero);
9123 } else {
9125 SDValue MantAllOnes = DAG.getConstant(MantMask, dl, IntVT);
9126 SDValue IsMantAllOnes =
9127 DAG.getSetCC(dl, SetCCVT, MantField, MantAllOnes, ISD::SETEQ);
9128 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantAllOnes);
9129 }
9130
9131 SDValue IsInf;
9132 if (NFBehavior == fltNonfiniteBehavior::IEEE754)
9133 IsInf = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantZero);
9134 else
9135 IsInf = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9136
9137 SDValue IsZero = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantZero);
9138 SDValue IsDenorm =
9139 DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantNonZero);
9140
9141 // Normal value conversion.
9142 const int BiasAdjust = DstBias - SrcBias;
9143 SDValue NormDstExp =
9144 DAG.getNode(ISD::ADD, dl, IntVT, ExpField,
9145 DAG.getConstant(APInt(DstBits, BiasAdjust, true), dl, IntVT));
9146
9147 SDValue NormDstMant;
9148 if (DstMant > SrcMant) {
9149 SDValue NormDstMantShift =
9150 DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl);
9151 NormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, MantField, NormDstMantShift);
9152 } else {
9153 NormDstMant = MantField;
9154 }
9155
9156 SDValue DstMantShift = DAG.getShiftAmountConstant(DstMant, IntVT, dl);
9157 SDValue NormExpShifted =
9158 DAG.getNode(ISD::SHL, dl, IntVT, NormDstExp, DstMantShift);
9159 SDValue NormResult =
9160 DAG.getNode(ISD::OR, dl, IntVT,
9161 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted),
9162 NormDstMant);
9163
9164 // Denormal value conversion.
9165 SDValue DenormResult;
9166 {
9167 const unsigned IntVTBits = DstBits;
9168 SDValue LeadingZeros =
9169 DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, IntVT, MantField);
9170
9171 const int DenormExpConst =
9172 (int)IntVTBits + DstBias - SrcBias - (int)SrcMant;
9173 SDValue DenormDstExp = DAG.getNode(
9174 ISD::SUB, dl, IntVT,
9175 DAG.getConstant(APInt(DstBits, DenormExpConst, true), dl, IntVT),
9176 LeadingZeros);
9177
9178 SDValue MantMSB =
9179 DAG.getNode(ISD::SUB, dl, IntVT,
9180 DAG.getConstant(IntVTBits - 1, dl, IntVT), LeadingZeros);
9181
9182 SDValue LeadingOne = DAG.getNode(ISD::SHL, dl, IntVT, One, MantMSB);
9183 SDValue Frac = DAG.getNode(ISD::XOR, dl, IntVT, MantField, LeadingOne);
9184
9185 const unsigned ShiftSub = IntVTBits - 1 - DstMant;
9186 SDValue ShiftAmount = DAG.getNode(ISD::SUB, dl, IntVT, LeadingZeros,
9187 DAG.getConstant(ShiftSub, dl, IntVT));
9188
9189 SDValue DenormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, Frac, ShiftAmount);
9190
9191 SDValue DenormExpShifted =
9192 DAG.getNode(ISD::SHL, dl, IntVT, DenormDstExp, DstMantShift);
9193 DenormResult = DAG.getNode(
9194 ISD::OR, dl, IntVT,
9195 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted),
9196 DenormDstMant);
9197 }
9198
9199 SDValue FiniteResult =
9200 DAG.getSelect(dl, IntVT, IsDenorm, DenormResult, NormResult);
9201
9202 const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
9203 SDValue NaNResult =
9204 DAG.getConstant((DstExpAllOnes << DstMant) | QNaNBit, dl, IntVT);
9205
9206 SDValue InfResult =
9207 DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9208 DAG.getConstant(DstExpAllOnes << DstMant, dl, IntVT));
9209
9210 SDValue ZeroResult = SignShifted;
9211
9212 SDValue Result = FiniteResult;
9213 Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result);
9214 Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result);
9215 Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result);
9216
9217 return DAG.getNode(ISD::BITCAST, dl, DstVT, Result);
9218}
9219
9221 SelectionDAG &DAG) const {
9222 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9223 SDValue Src = Node->getOperand(OpNo);
9224 EVT SrcVT = Src.getValueType();
9225 EVT DstVT = Node->getValueType(0);
9226 SDLoc dl(SDValue(Node, 0));
9227
9228 // FIXME: Only f32 to i64 conversions are supported.
9229 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
9230 return false;
9231
9232 if (Node->isStrictFPOpcode())
9233 // When a NaN is converted to an integer a trap is allowed. We can't
9234 // use this expansion here because it would eliminate that trap. Other
9235 // traps are also allowed and cannot be eliminated. See
9236 // IEEE 754-2008 sec 5.8.
9237 return false;
9238
9239 // Expand f32 -> i64 conversion
9240 // This algorithm comes from compiler-rt's implementation of fixsfdi:
9241 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
9242 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
9243 EVT IntVT = SrcVT.changeTypeToInteger();
9244 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
9245
9246 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
9247 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
9248 SDValue Bias = DAG.getConstant(127, dl, IntVT);
9249 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
9250 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
9251 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
9252
9253 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
9254
9255 SDValue ExponentBits = DAG.getNode(
9256 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
9257 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
9258 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
9259
9260 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
9261 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
9262 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
9263 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
9264
9265 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
9266 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
9267 DAG.getConstant(0x00800000, dl, IntVT));
9268
9269 R = DAG.getZExtOrTrunc(R, dl, DstVT);
9270
9271 R = DAG.getSelectCC(
9272 dl, Exponent, ExponentLoBit,
9273 DAG.getNode(ISD::SHL, dl, DstVT, R,
9274 DAG.getZExtOrTrunc(
9275 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
9276 dl, IntShVT)),
9277 DAG.getNode(ISD::SRL, dl, DstVT, R,
9278 DAG.getZExtOrTrunc(
9279 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
9280 dl, IntShVT)),
9281 ISD::SETGT);
9282
9283 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
9284 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
9285
9286 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
9287 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
9288 return true;
9289}
9290
9292 SDValue &Chain,
9293 SelectionDAG &DAG) const {
9294 SDLoc dl(SDValue(Node, 0));
9295 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9296 SDValue Src = Node->getOperand(OpNo);
9297
9298 EVT SrcVT = Src.getValueType();
9299 EVT DstVT = Node->getValueType(0);
9300 EVT SetCCVT =
9301 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
9302 EVT DstSetCCVT =
9303 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
9304
9305 // Only expand vector types if we have the appropriate vector bit operations.
9306 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
9308 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
9310 return false;
9311
9312 // If the maximum float value is smaller then the signed integer range,
9313 // the destination signmask can't be represented by the float, so we can
9314 // just use FP_TO_SINT directly.
9315 const fltSemantics &APFSem = SrcVT.getFltSemantics();
9316 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
9317 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
9319 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
9320 if (Node->isStrictFPOpcode()) {
9321 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9322 { Node->getOperand(0), Src });
9323 Chain = Result.getValue(1);
9324 } else
9325 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9326 return true;
9327 }
9328
9329 // Don't expand it if there isn't cheap fsub instruction.
9331 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
9332 return false;
9333
9334 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
9335 SDValue Sel;
9336
9337 if (Node->isStrictFPOpcode()) {
9338 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
9339 Node->getOperand(0), /*IsSignaling*/ true);
9340 Chain = Sel.getValue(1);
9341 } else {
9342 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
9343 }
9344
9345 bool Strict = Node->isStrictFPOpcode() ||
9346 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
9347
9348 if (Strict) {
9349 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
9350 // signmask then offset (the result of which should be fully representable).
9351 // Sel = Src < 0x8000000000000000
9352 // FltOfs = select Sel, 0, 0x8000000000000000
9353 // IntOfs = select Sel, 0, 0x8000000000000000
9354 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
9355
9356 // TODO: Should any fast-math-flags be set for the FSUB?
9357 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
9358 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
9359 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9360 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
9361 DAG.getConstant(0, dl, DstVT),
9362 DAG.getConstant(SignMask, dl, DstVT));
9363 SDValue SInt;
9364 if (Node->isStrictFPOpcode()) {
9365 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
9366 { Chain, Src, FltOfs });
9367 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9368 { Val.getValue(1), Val });
9369 Chain = SInt.getValue(1);
9370 } else {
9371 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
9372 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
9373 }
9374 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
9375 } else {
9376 // Expand based on maximum range of FP_TO_SINT:
9377 // True = fp_to_sint(Src)
9378 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
9379 // Result = select (Src < 0x8000000000000000), True, False
9380
9381 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9382 // TODO: Should any fast-math-flags be set for the FSUB?
9383 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
9384 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
9385 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
9386 DAG.getConstant(SignMask, dl, DstVT));
9387 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9388 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
9389 }
9390 return true;
9391}
9392
9394 SDValue &Chain, SelectionDAG &DAG) const {
9395 // This transform is not correct for converting 0 when rounding mode is set
9396 // to round toward negative infinity which will produce -0.0. So disable
9397 // under strictfp.
9398 if (Node->isStrictFPOpcode())
9399 return false;
9400
9401 SDValue Src = Node->getOperand(0);
9402 EVT SrcVT = Src.getValueType();
9403 EVT DstVT = Node->getValueType(0);
9404
9405 // If the input is known to be non-negative and SINT_TO_FP is legal then use
9406 // it.
9407 if (Node->getFlags().hasNonNeg() &&
9409 Result =
9410 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
9411 return true;
9412 }
9413
9414 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
9415 return false;
9416
9417 // Only expand vector types if we have the appropriate vector bit
9418 // operations.
9419 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
9424 return false;
9425
9426 SDLoc dl(SDValue(Node, 0));
9427
9428 // Implementation of unsigned i64 to f64 following the algorithm in
9429 // __floatundidf in compiler_rt. This implementation performs rounding
9430 // correctly in all rounding modes with the exception of converting 0
9431 // when rounding toward negative infinity. In that case the fsub will
9432 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
9433 // incorrect.
9434 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
9435 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
9436 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
9437 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
9438 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
9439 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
9440
9441 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
9442 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
9443 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
9444 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
9445 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
9446 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
9447 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
9448 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
9449 return true;
9450}
9451
9452SDValue
9454 SelectionDAG &DAG) const {
9455 unsigned Opcode = Node->getOpcode();
9456 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
9457 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
9458 "Wrong opcode");
9459
9460 if (Node->getFlags().hasNoNaNs()) {
9461 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
9462 EVT VT = Node->getValueType(0);
9463 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
9465 VT.isVector())
9466 return SDValue();
9467 SDValue Op1 = Node->getOperand(0);
9468 SDValue Op2 = Node->getOperand(1);
9469 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
9470 Node->getFlags());
9471 }
9472
9473 return SDValue();
9474}
9475
9477 SelectionDAG &DAG) const {
9478 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
9479 return Expanded;
9480
9481 EVT VT = Node->getValueType(0);
9482 if (VT.isScalableVector())
9484 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
9485
9486 SDLoc dl(Node);
9487 unsigned NewOp =
9489
9490 if (isOperationLegalOrCustom(NewOp, VT)) {
9491 SDValue Quiet0 = Node->getOperand(0);
9492 SDValue Quiet1 = Node->getOperand(1);
9493
9494 if (!Node->getFlags().hasNoNaNs()) {
9495 // Insert canonicalizes if it's possible we need to quiet to get correct
9496 // sNaN behavior.
9497 if (!DAG.isKnownNeverSNaN(Quiet0)) {
9498 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
9499 Node->getFlags());
9500 }
9501 if (!DAG.isKnownNeverSNaN(Quiet1)) {
9502 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
9503 Node->getFlags());
9504 }
9505 }
9506
9507 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
9508 }
9509
9510 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
9511 // instead if there are no NaNs.
9512 if (Node->getFlags().hasNoNaNs() ||
9513 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
9514 DAG.isKnownNeverNaN(Node->getOperand(1)))) {
9515 unsigned IEEE2018Op =
9516 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9517 if (isOperationLegalOrCustom(IEEE2018Op, VT))
9518 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
9519 Node->getOperand(1), Node->getFlags());
9520 }
9521
9523 return SelCC;
9524
9525 return SDValue();
9526}
9527
9529 SelectionDAG &DAG) const {
9530 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
9531 return Expanded;
9532
9533 SDLoc DL(N);
9534 SDValue LHS = N->getOperand(0);
9535 SDValue RHS = N->getOperand(1);
9536 unsigned Opc = N->getOpcode();
9537 EVT VT = N->getValueType(0);
9538 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9539 bool IsMax = Opc == ISD::FMAXIMUM;
9540 SDNodeFlags Flags = N->getFlags();
9541
9542 // First, implement comparison not propagating NaN. If no native fmin or fmax
9543 // available, use plain select with setcc instead.
9545 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9546 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
9547
9548 // FIXME: We should probably define fminnum/fmaxnum variants with correct
9549 // signed zero behavior.
9550 bool MinMaxMustRespectOrderedZero = false;
9551
9552 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
9553 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
9554 MinMaxMustRespectOrderedZero = true;
9555 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
9556 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
9557 } else {
9559 return DAG.UnrollVectorOp(N);
9560
9561 // NaN (if exists) will be propagated later, so orderness doesn't matter.
9562 SDValue Compare =
9563 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
9564 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
9565 }
9566
9567 // Propagate any NaN of both operands
9568 if (!N->getFlags().hasNoNaNs() &&
9569 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
9570 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
9572 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
9573 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
9574 }
9575
9576 // fminimum/fmaximum requires -0.0 less than +0.0
9577 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
9578 !DAG.isKnownNeverLogicalZero(RHS) && !DAG.isKnownNeverLogicalZero(LHS)) {
9579 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9580 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
9581 SDValue TestZero =
9582 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9583 SDValue LCmp = DAG.getSelect(
9584 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
9585 MinMax, Flags);
9586 SDValue RCmp = DAG.getSelect(
9587 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
9588 LCmp, Flags);
9589 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
9590 }
9591
9592 return MinMax;
9593}
9594
9596 SelectionDAG &DAG) const {
9597 SDLoc DL(Node);
9598 SDValue LHS = Node->getOperand(0);
9599 SDValue RHS = Node->getOperand(1);
9600 unsigned Opc = Node->getOpcode();
9601 EVT VT = Node->getValueType(0);
9602 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9603 bool IsMax = Opc == ISD::FMAXIMUMNUM;
9604 SDNodeFlags Flags = Node->getFlags();
9605
9606 unsigned NewOp =
9608
9609 if (isOperationLegalOrCustom(NewOp, VT)) {
9610 if (!Flags.hasNoNaNs()) {
9611 // Insert canonicalizes if it's possible we need to quiet to get correct
9612 // sNaN behavior.
9613 if (!DAG.isKnownNeverSNaN(LHS)) {
9614 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
9615 }
9616 if (!DAG.isKnownNeverSNaN(RHS)) {
9617 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
9618 }
9619 }
9620
9621 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
9622 }
9623
9624 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
9625 // same behaviors for all of other cases: +0.0 vs -0.0 included.
9626 if (Flags.hasNoNaNs() ||
9627 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
9628 unsigned IEEE2019Op =
9630 if (isOperationLegalOrCustom(IEEE2019Op, VT))
9631 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
9632 }
9633
9634 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9635 // either one for +0.0 vs -0.0.
9636 if ((Flags.hasNoNaNs() ||
9637 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
9638 (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9639 DAG.isKnownNeverLogicalZero(RHS))) {
9640 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9641 if (isOperationLegalOrCustom(IEEE2008Op, VT))
9642 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
9643 }
9644
9645 if (VT.isVector() &&
9648 return DAG.UnrollVectorOp(Node);
9649
9650 // If only one operand is NaN, override it with another operand.
9651 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
9652 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
9653 }
9654 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
9655 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
9656 }
9657
9658 // Always prefer RHS if equal.
9659 SDValue MinMax =
9660 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
9661
9662 // TODO: We need quiet sNaN if strictfp.
9663
9664 // Fixup signed zero behavior.
9665 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9666 DAG.isKnownNeverLogicalZero(RHS)) {
9667 return MinMax;
9668 }
9669 SDValue TestZero =
9670 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9671 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9672 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
9673 EVT IntVT = VT.changeTypeToInteger();
9674 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
9675 SDValue LHSTrunc = LHS;
9677 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
9678 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
9679 }
9680 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9681 // we preferred RHS when generate MinMax, if the operands are equal.
9682 SDValue RetZero = DAG.getSelect(
9683 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
9684 MinMax, Flags);
9685 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
9686}
9687
9688/// Returns a true value if if this FPClassTest can be performed with an ordered
9689/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9690/// std::nullopt if it cannot be performed as a compare with 0.
9691static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9692 const fltSemantics &Semantics,
9693 const MachineFunction &MF) {
9694 FPClassTest OrderedMask = Test & ~fcNan;
9695 FPClassTest NanTest = Test & fcNan;
9696 bool IsOrdered = NanTest == fcNone;
9697 bool IsUnordered = NanTest == fcNan;
9698
9699 // Skip cases that are testing for only a qnan or snan.
9700 if (!IsOrdered && !IsUnordered)
9701 return std::nullopt;
9702
9703 if (OrderedMask == fcZero &&
9704 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
9705 return IsOrdered;
9706 if (OrderedMask == (fcZero | fcSubnormal) &&
9707 MF.getDenormalMode(Semantics).inputsAreZero())
9708 return IsOrdered;
9709 return std::nullopt;
9710}
9711
9713 const FPClassTest OrigTestMask,
9714 SDNodeFlags Flags, const SDLoc &DL,
9715 SelectionDAG &DAG) const {
9716 EVT OperandVT = Op.getValueType();
9717 assert(OperandVT.isFloatingPoint());
9718 FPClassTest Test = OrigTestMask;
9719
9720 // Degenerated cases.
9721 if (Test == fcNone)
9722 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
9723 if (Test == fcAllFlags)
9724 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
9725
9726 // PPC double double is a pair of doubles, of which the higher part determines
9727 // the value class.
9728 if (OperandVT == MVT::ppcf128) {
9729 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
9730 DAG.getConstant(1, DL, MVT::i32));
9731 OperandVT = MVT::f64;
9732 }
9733
9734 // Floating-point type properties.
9735 EVT ScalarFloatVT = OperandVT.getScalarType();
9736 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
9737 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9738 bool IsF80 = (ScalarFloatVT == MVT::f80);
9739
9740 // Some checks can be implemented using float comparisons, if floating point
9741 // exceptions are ignored.
9742 if (Flags.hasNoFPExcept() &&
9744 FPClassTest FPTestMask = Test;
9745 bool IsInvertedFP = false;
9746
9747 if (FPClassTest InvertedFPCheck =
9748 invertFPClassTestIfSimpler(FPTestMask, true)) {
9749 FPTestMask = InvertedFPCheck;
9750 IsInvertedFP = true;
9751 }
9752
9753 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9754 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9755
9756 // See if we can fold an | fcNan into an unordered compare.
9757 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9758
9759 // Can't fold the ordered check if we're only testing for snan or qnan
9760 // individually.
9761 if ((FPTestMask & fcNan) != fcNan)
9762 OrderedFPTestMask = FPTestMask;
9763
9764 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9765
9766 if (std::optional<bool> IsCmp0 =
9767 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
9768 IsCmp0 && (isCondCodeLegalOrCustom(
9769 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9770 OperandVT.getScalarType().getSimpleVT()))) {
9771
9772 // If denormals could be implicitly treated as 0, this is not equivalent
9773 // to a compare with 0 since it will also be true for denormals.
9774 return DAG.getSetCC(DL, ResultVT, Op,
9775 DAG.getConstantFP(0.0, DL, OperandVT),
9776 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9777 }
9778
9779 if (FPTestMask == fcNan &&
9781 OperandVT.getScalarType().getSimpleVT()))
9782 return DAG.getSetCC(DL, ResultVT, Op, Op,
9783 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9784
9785 bool IsOrderedInf = FPTestMask == fcInf;
9786 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9787 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9788 : UnorderedCmpOpcode,
9789 OperandVT.getScalarType().getSimpleVT()) &&
9792 (OperandVT.isVector() &&
9794 // isinf(x) --> fabs(x) == inf
9795 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9796 SDValue Inf =
9797 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9798 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9799 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9800 }
9801
9802 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9803 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9804 : UnorderedCmpOpcode,
9805 OperandVT.getSimpleVT())) {
9806 // isposinf(x) --> x == inf
9807 // isneginf(x) --> x == -inf
9808 // isposinf(x) || nan --> x u== inf
9809 // isneginf(x) || nan --> x u== -inf
9810
9811 SDValue Inf = DAG.getConstantFP(
9812 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9813 OperandVT);
9814 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9815 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9816 }
9817
9818 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9819 // TODO: Could handle ordered case, but it produces worse code for
9820 // x86. Maybe handle ordered if fabs is free?
9821
9822 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9823 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9824
9825 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9826 OperandVT.getScalarType().getSimpleVT())) {
9827 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9828
9829 // TODO: Maybe only makes sense if fabs is free. Integer test of
9830 // exponent bits seems better for x86.
9831 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9832 SDValue SmallestNormal = DAG.getConstantFP(
9833 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9834 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9835 IsOrdered ? OrderedOp : UnorderedOp);
9836 }
9837 }
9838
9839 if (FPTestMask == fcNormal) {
9840 // TODO: Handle unordered
9841 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9842 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9843
9844 if (isCondCodeLegalOrCustom(IsFiniteOp,
9845 OperandVT.getScalarType().getSimpleVT()) &&
9846 isCondCodeLegalOrCustom(IsNormalOp,
9847 OperandVT.getScalarType().getSimpleVT()) &&
9848 isFAbsFree(OperandVT)) {
9849 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9850 SDValue Inf =
9851 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9852 SDValue SmallestNormal = DAG.getConstantFP(
9853 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9854
9855 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9856 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9857 SDValue IsNormal =
9858 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9859 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9860 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9861 }
9862 }
9863 }
9864
9865 // Some checks may be represented as inversion of simpler check, for example
9866 // "inf|normal|subnormal|zero" => !"nan".
9867 bool IsInverted = false;
9868
9869 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9870 Test = InvertedCheck;
9871 IsInverted = true;
9872 }
9873
9874 // In the general case use integer operations.
9875 unsigned BitSize = OperandVT.getScalarSizeInBits();
9876 EVT IntVT = OperandVT.changeElementType(
9877 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
9878 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9879
9880 // Various masks.
9881 APInt SignBit = APInt::getSignMask(BitSize);
9882 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9883 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9884 const unsigned ExplicitIntBitInF80 = 63;
9885 APInt ExpMask = Inf;
9886 if (IsF80)
9887 ExpMask.clearBit(ExplicitIntBitInF80);
9888 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9889 APInt QNaNBitMask =
9890 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9891 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9892
9893 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9894 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9895 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9896 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9897 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9898 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9899
9900 SDValue Res;
9901 const auto appendResult = [&](SDValue PartialRes) {
9902 if (PartialRes) {
9903 if (Res)
9904 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9905 else
9906 Res = PartialRes;
9907 }
9908 };
9909
9910 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9911 const auto getIntBitIsSet = [&]() -> SDValue {
9912 if (!IntBitIsSetV) {
9913 APInt IntBitMask(BitSize, 0);
9914 IntBitMask.setBit(ExplicitIntBitInF80);
9915 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9916 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9917 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9918 }
9919 return IntBitIsSetV;
9920 };
9921
9922 // Split the value into sign bit and absolute value.
9923 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9924 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9925 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9926
9927 // Tests that involve more than one class should be processed first.
9928 SDValue PartialRes;
9929
9930 if (IsF80)
9931 ; // Detect finite numbers of f80 by checking individual classes because
9932 // they have different settings of the explicit integer bit.
9933 else if ((Test & fcFinite) == fcFinite) {
9934 // finite(V) ==> (a << 1) < (inf << 1)
9935 //
9936 // See https://github.com/llvm/llvm-project/issues/169270, this is slightly
9937 // shorter than the `finite(V) ==> abs(V) < exp_mask` formula used before.
9938
9940 "finite check requires IEEE-like FP");
9941
9942 SDValue One = DAG.getShiftAmountConstant(1, IntVT, DL);
9943 SDValue TwiceOp = DAG.getNode(ISD::SHL, DL, IntVT, OpAsInt, One);
9944 SDValue TwiceInf = DAG.getNode(ISD::SHL, DL, IntVT, ExpMaskV, One);
9945
9946 PartialRes = DAG.getSetCC(DL, ResultVT, TwiceOp, TwiceInf, ISD::SETULT);
9947 Test &= ~fcFinite;
9948 } else if ((Test & fcFinite) == fcPosFinite) {
9949 // finite(V) && V > 0 ==> V < exp_mask
9950 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9951 Test &= ~fcPosFinite;
9952 } else if ((Test & fcFinite) == fcNegFinite) {
9953 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9954 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9955 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9956 Test &= ~fcNegFinite;
9957 }
9958 appendResult(PartialRes);
9959
9960 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9961 // fcZero | fcSubnormal => test all exponent bits are 0
9962 // TODO: Handle sign bit specific cases
9963 if (PartialCheck == (fcZero | fcSubnormal)) {
9964 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9965 SDValue ExpIsZero =
9966 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9967 appendResult(ExpIsZero);
9968 Test &= ~PartialCheck & fcAllFlags;
9969 }
9970 }
9971
9972 // Check for individual classes.
9973
9974 if (unsigned PartialCheck = Test & fcZero) {
9975 if (PartialCheck == fcPosZero)
9976 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9977 else if (PartialCheck == fcZero)
9978 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9979 else // ISD::fcNegZero
9980 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9981 appendResult(PartialRes);
9982 }
9983
9984 if (unsigned PartialCheck = Test & fcSubnormal) {
9985 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9986 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9987 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9988 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9989 SDValue VMinusOneV =
9990 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9991 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9992 if (PartialCheck == fcNegSubnormal)
9993 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9994 appendResult(PartialRes);
9995 }
9996
9997 if (unsigned PartialCheck = Test & fcInf) {
9998 if (PartialCheck == fcPosInf)
9999 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
10000 else if (PartialCheck == fcInf)
10001 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
10002 else { // ISD::fcNegInf
10003 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10004 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
10005 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
10006 }
10007 appendResult(PartialRes);
10008 }
10009
10010 if (unsigned PartialCheck = Test & fcNan) {
10011 APInt InfWithQnanBit = Inf | QNaNBitMask;
10012 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
10013 if (PartialCheck == fcNan) {
10014 // isnan(V) ==> abs(V) > int(inf)
10015 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10016 if (IsF80) {
10017 // Recognize unsupported values as NaNs for compatibility with glibc.
10018 // In them (exp(V)==0) == int_bit.
10019 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
10020 SDValue ExpIsZero =
10021 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
10022 SDValue IsPseudo =
10023 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
10024 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
10025 }
10026 } else if (PartialCheck == fcQNan) {
10027 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
10028 PartialRes =
10029 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
10030 } else { // ISD::fcSNan
10031 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
10032 // abs(V) < (unsigned(Inf) | quiet_bit)
10033 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10034 SDValue IsNotQnan =
10035 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
10036 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
10037 }
10038 appendResult(PartialRes);
10039 }
10040
10041 if (unsigned PartialCheck = Test & fcNormal) {
10042 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
10043 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10044 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
10045 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
10046 APInt ExpLimit = ExpMask - ExpLSB;
10047 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
10048 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
10049 if (PartialCheck == fcNegNormal)
10050 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10051 else if (PartialCheck == fcPosNormal) {
10052 SDValue PosSignV =
10053 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
10054 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
10055 }
10056 if (IsF80)
10057 PartialRes =
10058 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
10059 appendResult(PartialRes);
10060 }
10061
10062 if (!Res)
10063 return DAG.getConstant(IsInverted, DL, ResultVT);
10064 if (IsInverted)
10065 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
10066 return Res;
10067}
10068
10069// Only expand vector types if we have the appropriate vector bit operations.
10070static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
10071 assert(VT.isVector() && "Expected vector type");
10072 unsigned Len = VT.getScalarSizeInBits();
10073 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
10076 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
10078}
10079
10081 SDLoc dl(Node);
10082 EVT VT = Node->getValueType(0);
10083 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10084 SDValue Op = Node->getOperand(0);
10085 unsigned Len = VT.getScalarSizeInBits();
10086 assert(VT.isInteger() && "CTPOP not implemented for this type.");
10087
10088 // TODO: Add support for irregular type lengths.
10089 if (!(Len <= 128 && Len % 8 == 0))
10090 return SDValue();
10091
10092 // Only expand vector types if we have the appropriate vector bit operations.
10093 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
10094 return SDValue();
10095
10096 // This is the "best" algorithm from
10097 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10098 SDValue Mask55 =
10099 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10100 SDValue Mask33 =
10101 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10102 SDValue Mask0F =
10103 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10104
10105 // v = v - ((v >> 1) & 0x55555555...)
10106 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
10107 DAG.getNode(ISD::AND, dl, VT,
10108 DAG.getNode(ISD::SRL, dl, VT, Op,
10109 DAG.getConstant(1, dl, ShVT)),
10110 Mask55));
10111 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10112 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
10113 DAG.getNode(ISD::AND, dl, VT,
10114 DAG.getNode(ISD::SRL, dl, VT, Op,
10115 DAG.getConstant(2, dl, ShVT)),
10116 Mask33));
10117 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10118 Op = DAG.getNode(ISD::AND, dl, VT,
10119 DAG.getNode(ISD::ADD, dl, VT, Op,
10120 DAG.getNode(ISD::SRL, dl, VT, Op,
10121 DAG.getConstant(4, dl, ShVT))),
10122 Mask0F);
10123
10124 if (Len <= 8)
10125 return Op;
10126
10127 // Avoid the multiply if we only have 2 bytes to add.
10128 // TODO: Only doing this for scalars because vectors weren't as obviously
10129 // improved.
10130 if (Len == 16 && !VT.isVector()) {
10131 // v = (v + (v >> 8)) & 0x00FF;
10132 return DAG.getNode(ISD::AND, dl, VT,
10133 DAG.getNode(ISD::ADD, dl, VT, Op,
10134 DAG.getNode(ISD::SRL, dl, VT, Op,
10135 DAG.getConstant(8, dl, ShVT))),
10136 DAG.getConstant(0xFF, dl, VT));
10137 }
10138
10139 // v = (v * 0x01010101...) >> (Len - 8)
10140 SDValue V;
10143 SDValue Mask01 =
10144 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10145 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
10146 } else {
10147 V = Op;
10148 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10149 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10150 V = DAG.getNode(ISD::ADD, dl, VT, V,
10151 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
10152 }
10153 }
10154 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
10155}
10156
10158 SDLoc dl(Node);
10159 EVT VT = Node->getValueType(0);
10160 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10161 SDValue Op = Node->getOperand(0);
10162 SDValue Mask = Node->getOperand(1);
10163 SDValue VL = Node->getOperand(2);
10164 unsigned Len = VT.getScalarSizeInBits();
10165 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
10166
10167 // TODO: Add support for irregular type lengths.
10168 if (!(Len <= 128 && Len % 8 == 0))
10169 return SDValue();
10170
10171 // This is same algorithm of expandCTPOP from
10172 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10173 SDValue Mask55 =
10174 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10175 SDValue Mask33 =
10176 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10177 SDValue Mask0F =
10178 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10179
10180 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
10181
10182 // v = v - ((v >> 1) & 0x55555555...)
10183 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
10184 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10185 DAG.getConstant(1, dl, ShVT), Mask, VL),
10186 Mask55, Mask, VL);
10187 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
10188
10189 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10190 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
10191 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
10192 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10193 DAG.getConstant(2, dl, ShVT), Mask, VL),
10194 Mask33, Mask, VL);
10195 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
10196
10197 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10198 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
10199 Mask, VL),
10200 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
10201 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
10202
10203 if (Len <= 8)
10204 return Op;
10205
10206 // v = (v * 0x01010101...) >> (Len - 8)
10207 SDValue V;
10209 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
10210 SDValue Mask01 =
10211 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10212 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
10213 } else {
10214 V = Op;
10215 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10216 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10217 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
10218 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
10219 Mask, VL);
10220 }
10221 }
10222 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
10223 Mask, VL);
10224}
10225
10227 SDLoc dl(Node);
10228 EVT VT = Node->getValueType(0);
10229 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10230 SDValue Op = Node->getOperand(0);
10231 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10232
10233 // If the non-ZERO_POISON version is supported we can use that instead.
10234 if (Node->getOpcode() == ISD::CTLZ_ZERO_POISON &&
10236 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
10237
10238 // If the ZERO_POISON version is supported use that and handle the zero case.
10240 EVT SetCCVT =
10241 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10242 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Op);
10243 SDValue Zero = DAG.getConstant(0, dl, VT);
10244 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10245 return DAG.getSelect(dl, VT, SrcIsZero,
10246 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
10247 }
10248
10249 // Only expand vector types if we have the appropriate vector bit operations.
10250 // This includes the operations needed to expand CTPOP if it isn't supported.
10251 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10253 !canExpandVectorCTPOP(*this, VT)) ||
10256 return SDValue();
10257
10258 // for now, we do this:
10259 // x = x | (x >> 1);
10260 // x = x | (x >> 2);
10261 // ...
10262 // x = x | (x >>16);
10263 // x = x | (x >>32); // for 64-bit input
10264 // return popcount(~x);
10265 //
10266 // Ref: "Hacker's Delight" by Henry Warren
10267 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10268 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10269 Op = DAG.getNode(ISD::OR, dl, VT, Op,
10270 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
10271 }
10272 Op = DAG.getNOT(dl, Op, VT);
10273 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
10274}
10275
10277 SDLoc dl(Node);
10278 EVT VT = Node->getValueType(0);
10279 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10280 SDValue Op = Node->getOperand(0);
10281 SDValue Mask = Node->getOperand(1);
10282 SDValue VL = Node->getOperand(2);
10283 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10284
10285 // do this:
10286 // x = x | (x >> 1);
10287 // x = x | (x >> 2);
10288 // ...
10289 // x = x | (x >>16);
10290 // x = x | (x >>32); // for 64-bit input
10291 // return popcount(~x);
10292 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10293 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10294 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
10295 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
10296 VL);
10297 }
10298 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
10299 Mask, VL);
10300 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
10301}
10302
10304 SDLoc dl(Node);
10305 EVT VT = Node->getValueType(0);
10306 SDValue Op = DAG.getFreeze(Node->getOperand(0));
10307 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10308
10309 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
10310 // This transforms the sign bits into leading zeros that can be counted.
10311 SDValue ShiftAmt = DAG.getShiftAmountConstant(NumBitsPerElt - 1, VT, dl);
10312 SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, Op, ShiftAmt);
10313 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, SignBit);
10314 SDValue Shl =
10315 DAG.getNode(ISD::SHL, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10316 SDValue Or = DAG.getNode(ISD::OR, dl, VT, Shl, DAG.getConstant(1, dl, VT));
10317 return DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Or);
10318}
10319
10321 const SDLoc &DL, EVT VT, SDValue Op,
10322 unsigned BitWidth) const {
10323 if (BitWidth != 32 && BitWidth != 64)
10324 return SDValue();
10325
10326 const DataLayout &TD = DAG.getDataLayout();
10328 return SDValue();
10329
10330 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
10331 : APInt(64, 0x0218A392CD3D5DBFULL);
10332 MachinePointerInfo PtrInfo =
10334 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
10335 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10336 SDValue Lookup = DAG.getNode(
10337 ISD::SRL, DL, VT,
10338 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
10339 DAG.getConstant(DeBruijn, DL, VT)),
10340 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
10342
10344 for (unsigned i = 0; i < BitWidth; i++) {
10345 APInt Shl = DeBruijn.shl(i);
10346 APInt Lshr = Shl.lshr(ShiftAmt);
10347 Table[Lshr.getZExtValue()] = i;
10348 }
10349
10350 // Create a ConstantArray in Constant Pool
10351 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
10352 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
10353 TD.getPrefTypeAlign(CA->getType()));
10354 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
10355 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
10356 PtrInfo, MVT::i8);
10357 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON)
10358 return ExtLoad;
10359
10360 EVT SetCCVT =
10361 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10362 SDValue Zero = DAG.getConstant(0, DL, VT);
10363 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
10364 return DAG.getSelect(DL, VT, SrcIsZero,
10365 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
10366}
10367
10369 SDLoc dl(Node);
10370 EVT VT = Node->getValueType(0);
10371 SDValue Op = Node->getOperand(0);
10372 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10373
10374 // If the non-ZERO_POISON version is supported we can use that instead.
10375 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON &&
10377 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
10378
10379 // If the ZERO_POISON version is supported use that and handle the zero case.
10381 EVT SetCCVT =
10382 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10383 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_POISON, dl, VT, Op);
10384 SDValue Zero = DAG.getConstant(0, dl, VT);
10385 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10386 return DAG.getSelect(dl, VT, SrcIsZero,
10387 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
10388 }
10389
10390 // Only expand vector types if we have the appropriate vector bit operations.
10391 // This includes the operations needed to expand CTPOP if it isn't supported.
10392 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10395 !canExpandVectorCTPOP(*this, VT)) ||
10399 return SDValue();
10400
10401 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
10402 // to be expanded or converted to a libcall.
10405 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
10406 return V;
10407
10408 // for now, we use: { return popcount(~x & (x - 1)); }
10409 // unless the target has ctlz but not ctpop, in which case we use:
10410 // { return 32 - nlz(~x & (x-1)); }
10411 // Ref: "Hacker's Delight" by Henry Warren
10412 SDValue Tmp = DAG.getNode(
10413 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
10414 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
10415
10416 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
10418 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
10419 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
10420 }
10421
10422 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
10423}
10424
10426 SDValue Op = Node->getOperand(0);
10427 SDValue Mask = Node->getOperand(1);
10428 SDValue VL = Node->getOperand(2);
10429 SDLoc dl(Node);
10430 EVT VT = Node->getValueType(0);
10431
10432 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
10433 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
10434 DAG.getAllOnesConstant(dl, VT), Mask, VL);
10435 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
10436 DAG.getConstant(1, dl, VT), Mask, VL);
10437 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
10438 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
10439}
10440
10442 SelectionDAG &DAG) const {
10443 // %cond = to_bool_vec %source
10444 // %splat = splat /*val=*/VL
10445 // %tz = step_vector
10446 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
10447 // %r = vp.reduce.umin %v
10448 SDLoc DL(N);
10449 SDValue Source = N->getOperand(0);
10450 SDValue Mask = N->getOperand(1);
10451 SDValue EVL = N->getOperand(2);
10452 EVT SrcVT = Source.getValueType();
10453 EVT ResVT = N->getValueType(0);
10454 EVT ResVecVT =
10455 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
10456
10457 // Convert to boolean vector.
10458 if (SrcVT.getScalarType() != MVT::i1) {
10459 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
10460 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
10461 SrcVT.getVectorElementCount());
10462 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
10463 DAG.getCondCode(ISD::SETNE), Mask, EVL);
10464 }
10465
10466 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
10467 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
10468 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
10469 SDValue Select =
10470 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
10471 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
10472}
10473
10474/// Returns a type-legalized version of \p Mask as the first item in the
10475/// pair. The second item contains a type-legalized step vector that's
10476/// guaranteed to fit the number of elements in \p Mask.
10477/// If the stepvector would require splitting, returns an empty SDValue
10478/// as the second item to signal that the operation should be split instead.
10479static std::pair<SDValue, SDValue>
10481 SelectionDAG &DAG) {
10482 EVT MaskVT = Mask.getValueType();
10483 EVT BoolVT = MaskVT.getScalarType();
10484
10485 // Find a suitable type for a stepvector.
10486 // If zero is poison, we can assume the upper limit of the result is VF-1.
10487 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
10488 if (MaskVT.isScalableVector())
10489 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
10490 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10491 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
10492 EVT(TLI.getVectorIdxTy(DAG.getDataLayout())),
10493 MaskVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
10494 // If the step vector element type is smaller than the mask element type,
10495 // use the mask type directly to avoid widening issues.
10496 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
10497 EVT StepVT = MVT::getIntegerVT(EltWidth);
10498 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
10499
10500 // If promotion or widening is required to make the type legal, do it here.
10501 // Promotion of integers within LegalizeVectorOps is looking for types of
10502 // the same size but with a smaller number of larger elements, not the usual
10503 // larger size with the same number of larger elements.
10505 TLI.getTypeAction(*DAG.getContext(), StepVecVT);
10506 SDValue StepVec;
10507 if (TypeAction == TargetLowering::TypePromoteInteger) {
10508 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10509 StepVec = DAG.getStepVector(DL, StepVecVT);
10510 } else if (TypeAction == TargetLowering::TypeWidenVector) {
10511 // For widening, the element count changes. Create a step vector with only
10512 // the original elements valid and zeros for padding. Also widen the mask.
10513 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10514 unsigned WideNumElts = WideVecVT.getVectorNumElements();
10515
10516 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
10517 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
10518 SDValue UndefStep = DAG.getPOISON(WideVecVT);
10519 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
10520
10521 // Widen mask: pad with zeros.
10522 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
10523 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
10524 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
10525 } else if (TypeAction == TargetLowering::TypeSplitVector) {
10526 // The stepvector type would require splitting. Signal to the caller
10527 // that the operation should be split instead of expanded.
10528 return {Mask, SDValue()};
10529 } else {
10530 StepVec = DAG.getStepVector(DL, StepVecVT);
10531 }
10532
10533 return {Mask, StepVec};
10534}
10535
10537 SelectionDAG &DAG) const {
10538 SDLoc DL(N);
10539 auto [Mask, StepVec] = getLegalMaskAndStepVector(
10540 N->getOperand(0), /*ZeroIsPoison=*/true, DL, DAG);
10541
10542 // If StepVec is empty, the stepvector would require splitting.
10543 // Split the operation instead and let it be recursively legalized.
10544 if (!StepVec) {
10545 EVT MaskVT = N->getOperand(0).getValueType();
10546 EVT ResVT = N->getValueType(0);
10547
10548 // Split the mask
10549 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(MaskVT);
10550 auto [MaskLo, MaskHi] = DAG.SplitVector(N->getOperand(0), DL);
10551
10552 // Create split VECTOR_FIND_LAST_ACTIVE operations
10553 SDValue LoResult =
10554 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskLo);
10555 SDValue HiResult =
10556 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskHi);
10557
10558 // Check if any lane is active in the high mask.
10559 SDValue AnyHiActive = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, MaskHi);
10561 AnyHiActive, DL,
10562 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i1),
10563 MVT::i1);
10564
10565 // Adjust HiResult by adding the number of elements in Lo
10566 SDValue LoNumElts =
10567 DAG.getElementCount(DL, ResVT, LoVT.getVectorElementCount());
10568 SDValue AdjustedHiResult =
10569 DAG.getNode(ISD::ADD, DL, ResVT, HiResult, LoNumElts);
10570
10571 // Return: AnyHiActive ? AdjustedHiResult : LoResult;
10572 return DAG.getNode(ISD::SELECT, DL, ResVT, Cond, AdjustedHiResult,
10573 LoResult);
10574 }
10575
10576 EVT StepVecVT = StepVec.getValueType();
10577 EVT StepVT = StepVec.getValueType().getVectorElementType();
10578
10579 // Zero out lanes with inactive elements, then find the highest remaining
10580 // value from the stepvector.
10581 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
10582 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
10583 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
10584 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
10585}
10586
10588 SelectionDAG &DAG) const {
10589 SDLoc DL(N);
10590 EVT VT = N->getValueType(0);
10591 SDValue SourceValue = N->getOperand(0);
10592 SDValue SinkValue = N->getOperand(1);
10593 SDValue EltSizeInBytes = N->getOperand(2);
10594
10595 // Note: The lane offset is scalable if the mask is scalable.
10596 ElementCount LaneOffsetEC =
10597 ElementCount::get(N->getConstantOperandVal(3), VT.isScalableVT());
10598
10599 EVT AddrVT = SourceValue->getValueType(0);
10600 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
10601
10602 // Take the difference between the pointers and divided by the element size,
10603 // to see how many lanes separate them.
10604 SDValue Diff = DAG.getNode(ISD::SUB, DL, AddrVT, SinkValue, SourceValue);
10605 if (IsReadAfterWrite)
10606 Diff = DAG.getNode(ISD::ABS, DL, AddrVT, Diff);
10607 Diff = DAG.getNode(ISD::SDIV, DL, AddrVT, Diff, EltSizeInBytes);
10608
10609 // The pointers do not alias if:
10610 // * Diff <= 0 (WAR_MASK)
10611 // * Diff == 0 (RAW_MASK)
10612 EVT CmpVT =
10613 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), AddrVT);
10614 SDValue Zero = DAG.getConstant(0, DL, AddrVT);
10615 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
10616 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
10617
10618 // The pointers do not alias if:
10619 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
10620 SDValue LaneOffset = DAG.getElementCount(DL, AddrVT, LaneOffsetEC);
10621 SDValue MaskN = DAG.getSelect(
10622 DL, AddrVT, Cmp,
10624 AddrVT),
10625 Diff);
10626
10627 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, VT, LaneOffset, MaskN);
10628}
10629
10631 bool IsNegative) const {
10632 SDLoc dl(N);
10633 EVT VT = N->getValueType(0);
10634 SDValue Op = N->getOperand(0);
10635
10636 // If expanding ABS_MIN_POISON, fall back to ABS if the target supports it.
10637 if (N->getOpcode() == ISD::ABS_MIN_POISON &&
10639 SDValue AbsVal = DAG.getNode(ISD::ABS, dl, VT, Op);
10640 if (IsNegative)
10641 return DAG.getNegative(AbsVal, dl, VT);
10642 return AbsVal;
10643 }
10644
10645 // abs(x) -> smax(x,sub(0,x))
10646 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10648 SDValue Zero = DAG.getConstant(0, dl, VT);
10649 Op = DAG.getFreeze(Op);
10650 return DAG.getNode(ISD::SMAX, dl, VT, Op,
10651 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10652 }
10653
10654 // abs(x) -> umin(x,sub(0,x))
10655 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10657 SDValue Zero = DAG.getConstant(0, dl, VT);
10658 Op = DAG.getFreeze(Op);
10659 return DAG.getNode(ISD::UMIN, dl, VT, Op,
10660 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10661 }
10662
10663 // 0 - abs(x) -> smin(x, sub(0,x))
10664 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
10666 SDValue Zero = DAG.getConstant(0, dl, VT);
10667 Op = DAG.getFreeze(Op);
10668 return DAG.getNode(ISD::SMIN, dl, VT, Op,
10669 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10670 }
10671
10672 // Only expand vector types if we have the appropriate vector operations.
10673 if (VT.isVector() &&
10675 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
10676 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
10678 return SDValue();
10679
10680 Op = DAG.getFreeze(Op);
10681 SDValue Shift = DAG.getNode(
10682 ISD::SRA, dl, VT, Op,
10683 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10684 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
10685
10686 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
10687 if (!IsNegative)
10688 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
10689
10690 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
10691 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
10692}
10693
10695 SDLoc dl(N);
10696 EVT VT = N->getValueType(0);
10697 SDValue LHS = N->getOperand(0);
10698 SDValue RHS = N->getOperand(1);
10699 bool IsSigned = N->getOpcode() == ISD::ABDS;
10700
10701 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
10702 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
10703 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
10704 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
10705 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
10706 LHS = DAG.getFreeze(LHS);
10707 RHS = DAG.getFreeze(RHS);
10708 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
10709 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
10710 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
10711 }
10712
10713 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
10714 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
10715 LHS = DAG.getFreeze(LHS);
10716 RHS = DAG.getFreeze(RHS);
10717 return DAG.getNode(ISD::OR, dl, VT,
10718 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
10719 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
10720 }
10721
10722 // If the subtract doesn't overflow then just use abs(sub())
10723 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
10724
10725 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
10726 return DAG.getNode(ISD::ABS, dl, VT,
10727 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
10728
10729 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
10730 return DAG.getNode(ISD::ABS, dl, VT,
10731 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10732
10733 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10735 LHS = DAG.getFreeze(LHS);
10736 RHS = DAG.getFreeze(RHS);
10737 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
10738
10739 // Branchless expansion iff cmp result is allbits:
10740 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
10741 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
10742 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10743 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
10744 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
10745 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
10746 }
10747
10748 // Similar to the branchless expansion, if we don't prefer selects, use the
10749 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10750 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10751 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
10752 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10754 SDValue USubO =
10755 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
10756 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
10757 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
10758 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
10759 }
10760
10761 // FIXME: Should really try to split the vector in case it's legal on a
10762 // subvector.
10764 return DAG.UnrollVectorOp(N);
10765
10766 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10767 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10768 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
10769 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10770}
10771
10773 SDLoc dl(N);
10774 EVT VT = N->getValueType(0);
10775 SDValue LHS = N->getOperand(0);
10776 SDValue RHS = N->getOperand(1);
10777
10778 unsigned Opc = N->getOpcode();
10779 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
10780 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
10781 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10782 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10783 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10784 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10786 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
10787 "Unknown AVG node");
10788
10789 // If the operands are already extended, we can add+shift.
10790 bool IsExt =
10791 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
10792 DAG.ComputeNumSignBits(RHS) >= 2) ||
10793 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
10794 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
10795 if (IsExt) {
10796 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
10797 if (!IsFloor)
10798 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
10799 return DAG.getNode(ShiftOpc, dl, VT, Sum,
10800 DAG.getShiftAmountConstant(1, VT, dl));
10801 }
10802
10803 // For scalars, see if we can efficiently extend/truncate to use add+shift.
10804 if (VT.isScalarInteger()) {
10805 EVT ExtVT = VT.widenIntegerElementType(*DAG.getContext());
10806 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
10807 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
10808 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
10809 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
10810 if (!IsFloor)
10811 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
10812 DAG.getConstant(1, dl, ExtVT));
10813 // Just use SRL as we will be truncating away the extended sign bits.
10814 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
10815 DAG.getShiftAmountConstant(1, ExtVT, dl));
10816 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
10817 }
10818 }
10819
10820 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10821 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
10824 SDValue UAddWithOverflow =
10825 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
10826
10827 SDValue Sum = UAddWithOverflow.getValue(0);
10828 SDValue Overflow = UAddWithOverflow.getValue(1);
10829
10830 // Right shift the sum by 1
10831 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
10832 DAG.getShiftAmountConstant(1, VT, dl));
10833
10834 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
10835 SDValue OverflowShl = DAG.getNode(
10836 ISD::SHL, dl, VT, ZeroExtOverflow,
10837 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10838
10839 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
10840 }
10841
10842 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10843 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10844 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10845 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10846 LHS = DAG.getFreeze(LHS);
10847 RHS = DAG.getFreeze(RHS);
10848 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
10849 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10850 SDValue Shift =
10851 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10852 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
10853}
10854
10856 SDLoc dl(N);
10857 EVT VT = N->getValueType(0);
10858 SDValue Op = N->getOperand(0);
10859
10860 if (!VT.isSimple())
10861 return SDValue();
10862
10863 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10864 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10865 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10866 default:
10867 return SDValue();
10868 case MVT::i16:
10869 // Use a rotate by 8. This can be further expanded if necessary.
10870 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10871 case MVT::i32:
10872 // This is meant for ARM specifically, which has ROTR but no ROTL.
10873 // t = x ^ rotr(x, 16)
10874 // t = bic(t, 0x00ff0000)
10875 // t = lshr(t, 8)
10876 // x = t ^ rotr(x, 8)
10878 SDValue Rotr16 =
10879 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(16, dl, SHVT));
10880 SDValue Tmp = DAG.getNode(ISD::XOR, dl, VT, Op, Rotr16);
10881 Tmp = DAG.getNode(ISD::AND, dl, VT, Tmp,
10882 DAG.getConstant(0xFF00FFFF, dl, VT));
10883 Tmp = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(8, dl, SHVT));
10884 SDValue Rotr8 =
10885 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10886 return DAG.getNode(ISD::XOR, dl, VT, Tmp, Rotr8);
10887 }
10888 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10889 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
10890 DAG.getConstant(0xFF00, dl, VT));
10891 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
10892 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10893 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
10894 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10895 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10896 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10897 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10898 case MVT::i64:
10899 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10900 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
10901 DAG.getConstant(255ULL<<8, dl, VT));
10902 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
10903 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
10904 DAG.getConstant(255ULL<<16, dl, VT));
10905 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
10906 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
10907 DAG.getConstant(255ULL<<24, dl, VT));
10908 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
10909 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10910 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
10911 DAG.getConstant(255ULL<<24, dl, VT));
10912 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10913 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
10914 DAG.getConstant(255ULL<<16, dl, VT));
10915 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
10916 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
10917 DAG.getConstant(255ULL<<8, dl, VT));
10918 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10919 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
10920 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
10921 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10922 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10923 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
10924 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10925 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
10926 }
10927}
10928
10930 SDLoc dl(N);
10931 EVT VT = N->getValueType(0);
10932 SDValue Op = N->getOperand(0);
10933 SDValue Mask = N->getOperand(1);
10934 SDValue EVL = N->getOperand(2);
10935
10936 if (!VT.isSimple())
10937 return SDValue();
10938
10939 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10940 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10941 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10942 default:
10943 return SDValue();
10944 case MVT::i16:
10945 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10946 Mask, EVL);
10947 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10948 Mask, EVL);
10949 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10950 case MVT::i32:
10951 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10952 Mask, EVL);
10953 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10954 Mask, EVL);
10955 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10956 Mask, EVL);
10957 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10958 Mask, EVL);
10959 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10960 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10961 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10962 Mask, EVL);
10963 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10964 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10965 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10966 case MVT::i64:
10967 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10968 Mask, EVL);
10969 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10970 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10971 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10972 Mask, EVL);
10973 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10974 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10975 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10976 Mask, EVL);
10977 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10978 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10979 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10980 Mask, EVL);
10981 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10982 Mask, EVL);
10983 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10984 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10985 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10986 Mask, EVL);
10987 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10988 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10989 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10990 Mask, EVL);
10991 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10992 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10993 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10994 Mask, EVL);
10995 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10996 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10997 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10998 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10999 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
11000 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
11001 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
11002 }
11003}
11004
11006 SDLoc dl(N);
11007 EVT VT = N->getValueType(0);
11008 SDValue Op = N->getOperand(0);
11009 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11010 unsigned Sz = VT.getScalarSizeInBits();
11011
11012 SDValue Tmp, Tmp2, Tmp3;
11013
11014 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11015 // and finally the i1 pairs.
11016 // TODO: We can easily support i4/i2 legal types if any target ever does.
11017 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11018 // Create the masks - repeating the pattern every byte.
11019 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11020 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11021 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11022
11023 // BSWAP if the type is wider than a single byte.
11024 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
11025
11026 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11027 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
11028 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
11029 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
11030 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
11031 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11032
11033 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11034 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
11035 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
11036 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
11037 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
11038 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11039
11040 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11041 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
11042 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
11043 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
11044 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
11045 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11046 return Tmp;
11047 }
11048
11049 Tmp = DAG.getConstant(0, dl, VT);
11050 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
11051 if (I < J)
11052 Tmp2 =
11053 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
11054 else
11055 Tmp2 =
11056 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
11057
11058 APInt Shift = APInt::getOneBitSet(Sz, J);
11059 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
11060 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
11061 }
11062
11063 return Tmp;
11064}
11065
11067 assert(N->getOpcode() == ISD::VP_BITREVERSE);
11068
11069 SDLoc dl(N);
11070 EVT VT = N->getValueType(0);
11071 SDValue Op = N->getOperand(0);
11072 SDValue Mask = N->getOperand(1);
11073 SDValue EVL = N->getOperand(2);
11074 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11075 unsigned Sz = VT.getScalarSizeInBits();
11076
11077 SDValue Tmp, Tmp2, Tmp3;
11078
11079 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11080 // and finally the i1 pairs.
11081 // TODO: We can easily support i4/i2 legal types if any target ever does.
11082 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11083 // Create the masks - repeating the pattern every byte.
11084 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11085 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11086 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11087
11088 // BSWAP if the type is wider than a single byte.
11089 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
11090
11091 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11092 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
11093 Mask, EVL);
11094 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11095 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
11096 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
11097 Mask, EVL);
11098 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
11099 Mask, EVL);
11100 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11101
11102 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11103 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
11104 Mask, EVL);
11105 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11106 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
11107 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
11108 Mask, EVL);
11109 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
11110 Mask, EVL);
11111 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11112
11113 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11114 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
11115 Mask, EVL);
11116 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11117 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
11118 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
11119 Mask, EVL);
11120 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
11121 Mask, EVL);
11122 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11123 return Tmp;
11124 }
11125 return SDValue();
11126}
11127
11128std::pair<SDValue, SDValue>
11130 SelectionDAG &DAG) const {
11131 SDLoc SL(LD);
11132 SDValue Chain = LD->getChain();
11133 SDValue BasePTR = LD->getBasePtr();
11134 EVT SrcVT = LD->getMemoryVT();
11135 EVT DstVT = LD->getValueType(0);
11136 ISD::LoadExtType ExtType = LD->getExtensionType();
11137
11138 if (SrcVT.isScalableVector())
11139 report_fatal_error("Cannot scalarize scalable vector loads");
11140
11141 unsigned NumElem = SrcVT.getVectorNumElements();
11142
11143 EVT SrcEltVT = SrcVT.getScalarType();
11144 EVT DstEltVT = DstVT.getScalarType();
11145
11146 // A vector must always be stored in memory as-is, i.e. without any padding
11147 // between the elements, since various code depend on it, e.g. in the
11148 // handling of a bitcast of a vector type to int, which may be done with a
11149 // vector store followed by an integer load. A vector that does not have
11150 // elements that are byte-sized must therefore be stored as an integer
11151 // built out of the extracted vector elements.
11152 if (!SrcEltVT.isByteSized()) {
11153 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
11154 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
11155
11156 unsigned NumSrcBits = SrcVT.getSizeInBits();
11157 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
11158
11159 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
11160 SDValue SrcEltBitMask = DAG.getConstant(
11161 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
11162
11163 // Load the whole vector and avoid masking off the top bits as it makes
11164 // the codegen worse.
11165 SDValue Load =
11166 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
11167 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
11168 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11169
11171 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11172 unsigned ShiftIntoIdx =
11173 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11174 SDValue ShiftAmount = DAG.getShiftAmountConstant(
11175 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
11176 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
11177 SDValue Elt =
11178 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
11179 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
11180
11181 if (ExtType != ISD::NON_EXTLOAD) {
11182 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
11183 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
11184 }
11185
11186 Vals.push_back(Scalar);
11187 }
11188
11189 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11190 return std::make_pair(Value, Load.getValue(1));
11191 }
11192
11193 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
11194 assert(SrcEltVT.isByteSized());
11195
11197 SmallVector<SDValue, 8> LoadChains;
11198
11199 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11200 SDValue ScalarLoad = DAG.getExtLoad(
11201 ExtType, SL, DstEltVT, Chain, BasePTR,
11202 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
11203 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11204
11205 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
11206
11207 Vals.push_back(ScalarLoad.getValue(0));
11208 LoadChains.push_back(ScalarLoad.getValue(1));
11209 }
11210
11211 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
11212 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11213
11214 return std::make_pair(Value, NewChain);
11215}
11216
11218 SelectionDAG &DAG) const {
11219 SDLoc SL(ST);
11220
11221 SDValue Chain = ST->getChain();
11222 SDValue BasePtr = ST->getBasePtr();
11223 SDValue Value = ST->getValue();
11224 EVT StVT = ST->getMemoryVT();
11225
11226 if (StVT.isScalableVector())
11227 report_fatal_error("Cannot scalarize scalable vector stores");
11228
11229 // The type of the data we want to save
11230 EVT RegVT = Value.getValueType();
11231 EVT RegSclVT = RegVT.getScalarType();
11232
11233 // The type of data as saved in memory.
11234 EVT MemSclVT = StVT.getScalarType();
11235
11236 unsigned NumElem = StVT.getVectorNumElements();
11237
11238 // A vector must always be stored in memory as-is, i.e. without any padding
11239 // between the elements, since various code depend on it, e.g. in the
11240 // handling of a bitcast of a vector type to int, which may be done with a
11241 // vector store followed by an integer load. A vector that does not have
11242 // elements that are byte-sized must therefore be stored as an integer
11243 // built out of the extracted vector elements.
11244 if (!MemSclVT.isByteSized()) {
11245 unsigned NumBits = StVT.getSizeInBits();
11246 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
11247
11248 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
11249
11250 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11251 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11252 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
11253 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
11254 unsigned ShiftIntoIdx =
11255 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11256 SDValue ShiftAmount =
11257 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
11258 SDValue ShiftedElt =
11259 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
11260 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
11261 }
11262
11263 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
11264 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11265 ST->getAAInfo());
11266 }
11267
11268 // Store Stride in bytes
11269 unsigned Stride = MemSclVT.getSizeInBits() / 8;
11270 assert(Stride && "Zero stride!");
11271 // Extract each of the elements from the original vector and save them into
11272 // memory individually.
11274 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11275 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11276
11277 SDValue Ptr =
11278 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
11279
11280 // This scalar TruncStore may be illegal, but we legalize it later.
11281 SDValue Store = DAG.getTruncStore(
11282 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
11283 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11284 ST->getAAInfo());
11285
11286 Stores.push_back(Store);
11287 }
11288
11289 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
11290}
11291
11292std::pair<SDValue, SDValue>
11294 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
11295 "unaligned indexed loads not implemented!");
11296 SDValue Chain = LD->getChain();
11297 SDValue Ptr = LD->getBasePtr();
11298 EVT VT = LD->getValueType(0);
11299 EVT LoadedVT = LD->getMemoryVT();
11300 SDLoc dl(LD);
11301 auto &MF = DAG.getMachineFunction();
11302
11303 if (VT.isFloatingPoint() || VT.isVector()) {
11304 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
11305 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
11306 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
11307 LoadedVT.isVector()) {
11308 // Scalarize the load and let the individual components be handled.
11309 return scalarizeVectorLoad(LD, DAG);
11310 }
11311
11312 // Expand to a (misaligned) integer load of the same size,
11313 // then bitconvert to floating point or vector.
11314 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
11315 LD->getMemOperand());
11316 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
11317 if (LoadedVT != VT)
11318 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
11319 ISD::ANY_EXTEND, dl, VT, Result);
11320
11321 return std::make_pair(Result, newLoad.getValue(1));
11322 }
11323
11324 // Copy the value to a (aligned) stack slot using (unaligned) integer
11325 // loads and stores, then do a (aligned) load from the stack slot.
11326 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
11327 unsigned LoadedBytes = LoadedVT.getStoreSize();
11328 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11329 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
11330
11331 // Make sure the stack slot is also aligned for the register type.
11332 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
11333 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
11335 SDValue StackPtr = StackBase;
11336 unsigned Offset = 0;
11337
11338 EVT PtrVT = Ptr.getValueType();
11339 EVT StackPtrVT = StackPtr.getValueType();
11340
11341 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11342 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11343
11344 // Do all but one copies using the full register width.
11345 for (unsigned i = 1; i < NumRegs; i++) {
11346 // Load one integer register's worth from the original location.
11347 SDValue Load = DAG.getLoad(
11348 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
11349 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11350 // Follow the load with a store to the stack slot. Remember the store.
11351 Stores.push_back(DAG.getStore(
11352 Load.getValue(1), dl, Load, StackPtr,
11353 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
11354 // Increment the pointers.
11355 Offset += RegBytes;
11356
11357 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11358 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11359 }
11360
11361 // The last copy may be partial. Do an extending load.
11362 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
11363 8 * (LoadedBytes - Offset));
11364 SDValue Load = DAG.getExtLoad(
11365 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
11366 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
11367 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11368 // Follow the load with a store to the stack slot. Remember the store.
11369 // On big-endian machines this requires a truncating store to ensure
11370 // that the bits end up in the right place.
11371 Stores.push_back(DAG.getTruncStore(
11372 Load.getValue(1), dl, Load, StackPtr,
11373 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
11374
11375 // The order of the stores doesn't matter - say it with a TokenFactor.
11376 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11377
11378 // Finally, perform the original load only redirected to the stack slot.
11379 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
11380 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
11381 LoadedVT);
11382
11383 // Callers expect a MERGE_VALUES node.
11384 return std::make_pair(Load, TF);
11385 }
11386
11387 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
11388 "Unaligned load of unsupported type.");
11389
11390 // Compute the new VT that is half the size of the old one. This is an
11391 // integer MVT.
11392 unsigned NumBits = LoadedVT.getSizeInBits();
11393 EVT NewLoadedVT;
11394 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
11395 NumBits >>= 1;
11396
11397 Align Alignment = LD->getBaseAlign();
11398 unsigned IncrementSize = NumBits / 8;
11399 ISD::LoadExtType HiExtType = LD->getExtensionType();
11400
11401 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
11402 if (HiExtType == ISD::NON_EXTLOAD)
11403 HiExtType = ISD::ZEXTLOAD;
11404
11405 // Load the value in two parts
11406 SDValue Lo, Hi;
11407 if (DAG.getDataLayout().isLittleEndian()) {
11408 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11409 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11410 LD->getAAInfo());
11411
11412 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11413 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
11414 LD->getPointerInfo().getWithOffset(IncrementSize),
11415 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11416 LD->getAAInfo());
11417 } else {
11418 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11419 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11420 LD->getAAInfo());
11421
11422 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11423 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
11424 LD->getPointerInfo().getWithOffset(IncrementSize),
11425 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11426 LD->getAAInfo());
11427 }
11428
11429 // aggregate the two parts
11430 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
11431 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
11432 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
11433
11434 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
11435 Hi.getValue(1));
11436
11437 return std::make_pair(Result, TF);
11438}
11439
11441 SelectionDAG &DAG) const {
11442 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
11443 "unaligned indexed stores not implemented!");
11444 SDValue Chain = ST->getChain();
11445 SDValue Ptr = ST->getBasePtr();
11446 SDValue Val = ST->getValue();
11447 EVT VT = Val.getValueType();
11448 Align Alignment = ST->getBaseAlign();
11449 auto &MF = DAG.getMachineFunction();
11450 EVT StoreMemVT = ST->getMemoryVT();
11451
11452 SDLoc dl(ST);
11453 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
11454 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11455 if (isTypeLegal(intVT)) {
11456 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
11457 StoreMemVT.isVector()) {
11458 // Scalarize the store and let the individual components be handled.
11459 SDValue Result = scalarizeVectorStore(ST, DAG);
11460 return Result;
11461 }
11462 // Expand to a bitconvert of the value to the integer type of the
11463 // same size, then a (misaligned) int store.
11464 // FIXME: Does not handle truncating floating point stores!
11465 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
11466 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
11467 Alignment, ST->getMemOperand()->getFlags());
11468 return Result;
11469 }
11470 // Do a (aligned) store to a stack slot, then copy from the stack slot
11471 // to the final destination using (unaligned) integer loads and stores.
11472 MVT RegVT = getRegisterType(
11473 *DAG.getContext(),
11474 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
11475 EVT PtrVT = Ptr.getValueType();
11476 unsigned StoredBytes = StoreMemVT.getStoreSize();
11477 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11478 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
11479
11480 // Make sure the stack slot is also aligned for the register type.
11481 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
11482 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11483
11484 // Perform the original store, only redirected to the stack slot.
11485 SDValue Store = DAG.getTruncStore(
11486 Chain, dl, Val, StackPtr,
11487 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
11488
11489 EVT StackPtrVT = StackPtr.getValueType();
11490
11491 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11492 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11494 unsigned Offset = 0;
11495
11496 // Do all but one copies using the full register width.
11497 for (unsigned i = 1; i < NumRegs; i++) {
11498 // Load one integer register's worth from the stack slot.
11499 SDValue Load = DAG.getLoad(
11500 RegVT, dl, Store, StackPtr,
11501 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
11502 // Store it to the final location. Remember the store.
11503 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
11504 ST->getPointerInfo().getWithOffset(Offset),
11505 ST->getBaseAlign(),
11506 ST->getMemOperand()->getFlags()));
11507 // Increment the pointers.
11508 Offset += RegBytes;
11509 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11510 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11511 }
11512
11513 // The last store may be partial. Do a truncating store. On big-endian
11514 // machines this requires an extending load from the stack slot to ensure
11515 // that the bits are in the right place.
11516 EVT LoadMemVT =
11517 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
11518
11519 // Load from the stack slot.
11520 SDValue Load = DAG.getExtLoad(
11521 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
11522 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
11523
11524 Stores.push_back(DAG.getTruncStore(
11525 Load.getValue(1), dl, Load, Ptr,
11526 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
11527 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
11528 // The order of the stores doesn't matter - say it with a TokenFactor.
11529 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11530 return Result;
11531 }
11532
11533 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
11534 "Unaligned store of unknown type.");
11535 // Get the half-size VT
11536 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
11537 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
11538 unsigned IncrementSize = NumBits / 8;
11539
11540 // Divide the stored value in two parts.
11541 SDValue ShiftAmount =
11542 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
11543 SDValue Lo = Val;
11544 // If Val is a constant, replace the upper bits with 0. The SRL will constant
11545 // fold and not use the upper bits. A smaller constant may be easier to
11546 // materialize.
11547 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
11548 Lo = DAG.getNode(
11549 ISD::AND, dl, VT, Lo,
11550 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
11551 VT));
11552 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
11553
11554 // Store the two parts
11555 SDValue Store1, Store2;
11556 Store1 = DAG.getTruncStore(Chain, dl,
11557 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
11558 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
11559 ST->getMemOperand()->getFlags());
11560
11561 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11562 Store2 = DAG.getTruncStore(
11563 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
11564 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
11565 ST->getMemOperand()->getFlags(), ST->getAAInfo());
11566
11567 SDValue Result =
11568 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
11569 return Result;
11570}
11571
11572SDValue
11574 const SDLoc &DL, EVT DataVT,
11575 SelectionDAG &DAG,
11576 bool IsCompressedMemory) const {
11578 EVT AddrVT = Addr.getValueType();
11579 EVT MaskVT = Mask.getValueType();
11580 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
11581 "Incompatible types of Data and Mask");
11582 if (IsCompressedMemory) {
11583 // Incrementing the pointer according to number of '1's in the mask.
11584 if (DataVT.isScalableVector()) {
11585 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
11586 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
11587 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
11588 } else {
11589 EVT MaskIntVT =
11590 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
11591 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
11592 if (MaskIntVT.getSizeInBits() < 32) {
11593 MaskInIntReg =
11594 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
11595 MaskIntVT = MVT::i32;
11596 }
11597 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
11598 }
11599 // Scale is an element size in bytes.
11600 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
11601 AddrVT);
11602 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
11603 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
11604 } else
11605 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
11606
11607 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
11608}
11609
11611 EVT VecVT, const SDLoc &dl,
11612 ElementCount SubEC) {
11613 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
11614 "Cannot index a scalable vector within a fixed-width vector");
11615
11616 unsigned NElts = VecVT.getVectorMinNumElements();
11617 unsigned NumSubElts = SubEC.getKnownMinValue();
11618 EVT IdxVT = Idx.getValueType();
11619
11620 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
11621 // If this is a constant index and we know the value plus the number of the
11622 // elements in the subvector minus one is less than the minimum number of
11623 // elements then it's safe to return Idx.
11624 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
11625 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
11626 return Idx;
11627 SDValue VS =
11628 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
11629 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
11630 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
11631 DAG.getConstant(NumSubElts, dl, IdxVT));
11632 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
11633 }
11634 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
11635 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
11636 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
11637 DAG.getConstant(Imm, dl, IdxVT));
11638 }
11639 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
11640 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
11641 DAG.getConstant(MaxIndex, dl, IdxVT));
11642}
11643
11644SDValue
11646 EVT VecVT, SDValue Index,
11647 const SDNodeFlags PtrArithFlags) const {
11649 DAG, VecPtr, VecVT,
11651 Index, PtrArithFlags);
11652}
11653
11654SDValue
11656 EVT VecVT, EVT SubVecVT, SDValue Index,
11657 const SDNodeFlags PtrArithFlags) const {
11658 SDLoc dl(Index);
11659 // Make sure the index type is big enough to compute in.
11660 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
11661
11662 EVT EltVT = VecVT.getVectorElementType();
11663
11664 // Calculate the element offset and add it to the pointer.
11665 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
11666 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
11667 "Converting bits to bytes lost precision");
11668 assert(SubVecVT.getVectorElementType() == EltVT &&
11669 "Sub-vector must be a vector with matching element type");
11670 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
11671 SubVecVT.getVectorElementCount());
11672
11673 EVT IdxVT = Index.getValueType();
11674 if (SubVecVT.isScalableVector())
11675 Index =
11676 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11677 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
11678
11679 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11680 DAG.getConstant(EltSize, dl, IdxVT));
11681 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
11682}
11683
11684//===----------------------------------------------------------------------===//
11685// Implementation of Emulated TLS Model
11686//===----------------------------------------------------------------------===//
11687
11689 SelectionDAG &DAG) const {
11690 // Access to address of TLS varialbe xyz is lowered to a function call:
11691 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
11692 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11693 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
11694 SDLoc dl(GA);
11695
11696 ArgListTy Args;
11697 const GlobalValue *GV =
11699 SmallString<32> NameString("__emutls_v.");
11700 NameString += GV->getName();
11701 StringRef EmuTlsVarName(NameString);
11702 const GlobalVariable *EmuTlsVar =
11703 GV->getParent()->getNamedGlobal(EmuTlsVarName);
11704 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
11705 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
11706
11707 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
11708
11710 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
11711 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
11712 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
11713
11714 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
11715 // At last for X86 targets, maybe good for other targets too?
11717 MFI.setAdjustsStack(true); // Is this only for X86 target?
11718 MFI.setHasCalls(true);
11719
11720 assert((GA->getOffset() == 0) &&
11721 "Emulated TLS must have zero offset in GlobalAddressSDNode");
11722 return CallResult.first;
11723}
11724
11726 SelectionDAG &DAG) const {
11727 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
11728 if (!isCtlzFast())
11729 return SDValue();
11730 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11731 SDLoc dl(Op);
11732 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
11733 EVT VT = Op.getOperand(0).getValueType();
11734 SDValue Zext = Op.getOperand(0);
11735 if (VT.bitsLT(MVT::i32)) {
11736 VT = MVT::i32;
11737 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
11738 }
11739 unsigned Log2b = Log2_32(VT.getSizeInBits());
11740 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
11741 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
11742 DAG.getConstant(Log2b, dl, MVT::i32));
11743 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
11744 }
11745 return SDValue();
11746}
11747
11749 SDValue Op0 = Node->getOperand(0);
11750 SDValue Op1 = Node->getOperand(1);
11751 EVT VT = Op0.getValueType();
11752 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11753 unsigned Opcode = Node->getOpcode();
11754 SDLoc DL(Node);
11755
11756 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11757 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
11758 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
11759 DAG.SignBitIsZero(Op1))
11760 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
11761
11762 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11763 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
11765 Op0 = DAG.getFreeze(Op0);
11766 SDValue Zero = DAG.getConstant(0, DL, VT);
11767 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11768 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
11769 }
11770
11771 // umin(x,y) -> sub(x,usubsat(x,y))
11772 // TODO: Missing freeze(Op0)?
11773 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
11775 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11776 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
11777 }
11778
11779 // umax(x,y) -> add(x,usubsat(y,x))
11780 // TODO: Missing freeze(Op0)?
11781 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
11783 return DAG.getNode(ISD::ADD, DL, VT, Op0,
11784 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
11785 }
11786
11787 // FIXME: Should really try to split the vector in case it's legal on a
11788 // subvector.
11790 return DAG.UnrollVectorOp(Node);
11791
11792 // Attempt to find an existing SETCC node that we can reuse.
11793 // TODO: Do we need a generic doesSETCCNodeExist?
11794 // TODO: Missing freeze(Op0)/freeze(Op1)?
11795 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11796 ISD::CondCode PrefCommuteCC,
11797 ISD::CondCode AltCommuteCC) {
11798 SDVTList BoolVTList = DAG.getVTList(BoolVT);
11799 for (ISD::CondCode CC : {PrefCC, AltCC}) {
11800 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11801 {Op0, Op1, DAG.getCondCode(CC)})) {
11802 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11803 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11804 }
11805 }
11806 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11807 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11808 {Op0, Op1, DAG.getCondCode(CC)})) {
11809 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11810 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
11811 }
11812 }
11813 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
11814 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11815 };
11816
11817 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11818 // -> Y = (A < B) ? B : A
11819 // -> Y = (A >= B) ? A : B
11820 // -> Y = (A <= B) ? B : A
11821 switch (Opcode) {
11822 case ISD::SMAX:
11823 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11824 case ISD::SMIN:
11825 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11826 case ISD::UMAX:
11827 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11828 case ISD::UMIN:
11829 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11830 }
11831
11832 llvm_unreachable("How did we get here?");
11833}
11834
11836 unsigned Opcode = Node->getOpcode();
11837 SDValue LHS = Node->getOperand(0);
11838 SDValue RHS = Node->getOperand(1);
11839 EVT VT = LHS.getValueType();
11840 SDLoc dl(Node);
11841
11842 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11843 assert(VT.isInteger() && "Expected operands to be integers");
11844
11845 // usub.sat(a, b) -> umax(a, b) - b
11846 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
11847 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
11848 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
11849 }
11850
11851 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11852 // Prefer this on targets without legal/cost-effective overflow-carry nodes.
11853 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS) &&
11855 LHS = DAG.getFreeze(LHS);
11856 SDValue Zero = DAG.getConstant(0, dl, VT);
11857 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11858 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
11859 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
11860 Subtrahend =
11861 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
11862 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
11863 }
11864
11865 // uadd.sat(a, b) -> umin(a, ~b) + b
11866 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
11867 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
11868 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
11869 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
11870 }
11871
11872 unsigned OverflowOp;
11873 switch (Opcode) {
11874 case ISD::SADDSAT:
11875 OverflowOp = ISD::SADDO;
11876 break;
11877 case ISD::UADDSAT:
11878 OverflowOp = ISD::UADDO;
11879 break;
11880 case ISD::SSUBSAT:
11881 OverflowOp = ISD::SSUBO;
11882 break;
11883 case ISD::USUBSAT:
11884 OverflowOp = ISD::USUBO;
11885 break;
11886 default:
11887 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11888 "addition or subtraction node.");
11889 }
11890
11891 // FIXME: Should really try to split the vector in case it's legal on a
11892 // subvector.
11894 return DAG.UnrollVectorOp(Node);
11895
11896 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11897 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11898 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11899 SDValue SumDiff = Result.getValue(0);
11900 SDValue Overflow = Result.getValue(1);
11901 SDValue Zero = DAG.getConstant(0, dl, VT);
11902 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
11903
11904 if (Opcode == ISD::UADDSAT) {
11906 // (LHS + RHS) | OverflowMask
11907 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11908 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
11909 }
11910 // Overflow ? 0xffff.... : (LHS + RHS)
11911 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
11912 }
11913
11914 if (Opcode == ISD::USUBSAT) {
11916 // (LHS - RHS) & ~OverflowMask
11917 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11918 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
11919 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
11920 }
11921 // Overflow ? 0 : (LHS - RHS)
11922 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
11923 }
11924
11925 assert((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
11926 "Expected signed saturating add/sub opcode");
11927
11928 const APInt MinVal = APInt::getSignedMinValue(BitWidth);
11929 const APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
11930
11931 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
11932 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
11933
11934 // If either of the operand signs are known, then they are guaranteed to
11935 // only saturate in one direction. If non-negative they will saturate
11936 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11937 //
11938 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11939 // sign of 'y' has to be flipped.
11940
11941 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11942 bool RHSIsNonNegative =
11943 Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() : KnownRHS.isNegative();
11944 if (LHSIsNonNegative || RHSIsNonNegative) {
11945 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11946 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
11947 }
11948
11949 bool LHSIsNegative = KnownLHS.isNegative();
11950 bool RHSIsNegative =
11951 Opcode == ISD::SADDSAT ? KnownRHS.isNegative() : KnownRHS.isNonNegative();
11952 if (LHSIsNegative || RHSIsNegative) {
11953 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11954 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
11955 }
11956
11957 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11958 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11959 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
11960 DAG.getConstant(BitWidth - 1, dl, VT));
11961 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
11962 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
11963}
11964
11966 unsigned Opcode = Node->getOpcode();
11967 SDValue LHS = Node->getOperand(0);
11968 SDValue RHS = Node->getOperand(1);
11969 EVT VT = LHS.getValueType();
11970 EVT ResVT = Node->getValueType(0);
11971 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11972 SDLoc dl(Node);
11973
11974 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11975 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11976 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
11977 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
11978
11979 // We can't perform arithmetic on i1 values. Extending them would
11980 // probably result in worse codegen, so let's just use two selects instead.
11981 // Some targets are also just better off using selects rather than subtraction
11982 // because one of the conditions can be merged with one of the selects.
11983 // And finally, if we don't know the contents of high bits of a boolean value
11984 // we can't perform any arithmetic either.
11986 BoolVT.getScalarSizeInBits() == 1 ||
11988 SDValue SelectZeroOrOne =
11989 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
11990 DAG.getConstant(0, dl, ResVT));
11991 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
11992 SelectZeroOrOne);
11993 }
11994
11996 std::swap(IsGT, IsLT);
11997 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
11998 ResVT);
11999}
12000
12002 unsigned Opcode = Node->getOpcode();
12003 bool IsSigned = Opcode == ISD::SSHLSAT;
12004 SDValue LHS = Node->getOperand(0);
12005 SDValue RHS = Node->getOperand(1);
12006 EVT VT = LHS.getValueType();
12007 SDLoc dl(Node);
12008
12009 assert((Node->getOpcode() == ISD::SSHLSAT ||
12010 Node->getOpcode() == ISD::USHLSAT) &&
12011 "Expected a SHLSAT opcode");
12012 assert(VT.isInteger() && "Expected operands to be integers");
12013
12015 return DAG.UnrollVectorOp(Node);
12016
12017 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
12018
12019 unsigned BW = VT.getScalarSizeInBits();
12020 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12021 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
12022 SDValue Orig =
12023 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
12024
12025 SDValue SatVal;
12026 if (IsSigned) {
12027 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
12028 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
12029 SDValue Cond =
12030 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
12031 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
12032 } else {
12033 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
12034 }
12035 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
12036 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
12037}
12038
12040 bool Signed, SDValue &Lo, SDValue &Hi,
12041 SDValue LHS, SDValue RHS,
12042 SDValue HiLHS, SDValue HiRHS) const {
12043 EVT VT = LHS.getValueType();
12044 assert(RHS.getValueType() == VT && "Mismatching operand types");
12045
12046 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
12047 assert((!Signed || !HiLHS) &&
12048 "Signed flag should only be set when HiLHS and RiRHS are null");
12049
12050 // We'll expand the multiplication by brute force because we have no other
12051 // options. This is a trivially-generalized version of the code from
12052 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
12053 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
12054 // sign bits while calculating the Hi half.
12055 unsigned Bits = VT.getSizeInBits();
12056 unsigned HalfBits = Bits / 2;
12057 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
12058 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
12059 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
12060
12061 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
12062 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
12063
12064 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
12065 // This is always an unsigned shift.
12066 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
12067
12068 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
12069 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
12070 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
12071
12072 SDValue U =
12073 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
12074 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
12075 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
12076
12077 SDValue V =
12078 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
12079 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
12080
12081 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
12082 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
12083
12084 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
12085 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
12086
12087 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
12088 // the products to Hi.
12089 if (HiLHS) {
12090 SDValue RHLL = DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS);
12091 SDValue RLLH = DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS);
12092 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
12093 DAG.getNode(ISD::ADD, dl, VT, RHLL, RLLH));
12094 }
12095}
12096
12098 bool Signed, const SDValue LHS,
12099 const SDValue RHS, SDValue &Lo,
12100 SDValue &Hi) const {
12101 EVT VT = LHS.getValueType();
12102 assert(RHS.getValueType() == VT && "Mismatching operand types");
12103 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12104 // We can fall back to a libcall with an illegal type for the MUL if we
12105 // have a libcall big enough.
12106 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
12107 if (WideVT == MVT::i16)
12108 LC = RTLIB::MUL_I16;
12109 else if (WideVT == MVT::i32)
12110 LC = RTLIB::MUL_I32;
12111 else if (WideVT == MVT::i64)
12112 LC = RTLIB::MUL_I64;
12113 else if (WideVT == MVT::i128)
12114 LC = RTLIB::MUL_I128;
12115
12116 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12117 if (LibcallImpl == RTLIB::Unsupported) {
12118 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
12119 return;
12120 }
12121
12122 SDValue HiLHS, HiRHS;
12123 if (Signed) {
12124 // The high part is obtained by SRA'ing all but one of the bits of low
12125 // part.
12126 unsigned LoSize = VT.getFixedSizeInBits();
12127 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
12128 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
12129 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
12130 } else {
12131 HiLHS = DAG.getConstant(0, dl, VT);
12132 HiRHS = DAG.getConstant(0, dl, VT);
12133 }
12134
12135 // Attempt a libcall.
12136 SDValue Ret;
12138 CallOptions.setIsSigned(Signed);
12139 CallOptions.setIsPostTypeLegalization(true);
12141 // Halves of WideVT are packed into registers in different order
12142 // depending on platform endianness. This is usually handled by
12143 // the C calling convention, but we can't defer to it in
12144 // the legalizer.
12145 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
12146 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12147 } else {
12148 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
12149 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12150 }
12152 "Ret value is a collection of constituent nodes holding result.");
12153 if (DAG.getDataLayout().isLittleEndian()) {
12154 // Same as above.
12155 Lo = Ret.getOperand(0);
12156 Hi = Ret.getOperand(1);
12157 } else {
12158 Lo = Ret.getOperand(1);
12159 Hi = Ret.getOperand(0);
12160 }
12161}
12162
12163SDValue
12165 assert((Node->getOpcode() == ISD::SMULFIX ||
12166 Node->getOpcode() == ISD::UMULFIX ||
12167 Node->getOpcode() == ISD::SMULFIXSAT ||
12168 Node->getOpcode() == ISD::UMULFIXSAT) &&
12169 "Expected a fixed point multiplication opcode");
12170
12171 SDLoc dl(Node);
12172 SDValue LHS = Node->getOperand(0);
12173 SDValue RHS = Node->getOperand(1);
12174 EVT VT = LHS.getValueType();
12175 unsigned Scale = Node->getConstantOperandVal(2);
12176 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
12177 Node->getOpcode() == ISD::UMULFIXSAT);
12178 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
12179 Node->getOpcode() == ISD::SMULFIXSAT);
12180 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12181 unsigned VTSize = VT.getScalarSizeInBits();
12182
12183 if (!Scale) {
12184 // [us]mul.fix(a, b, 0) -> mul(a, b)
12185 if (!Saturating) {
12187 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12188 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
12189 SDValue Result =
12190 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12191 SDValue Product = Result.getValue(0);
12192 SDValue Overflow = Result.getValue(1);
12193 SDValue Zero = DAG.getConstant(0, dl, VT);
12194
12195 APInt MinVal = APInt::getSignedMinValue(VTSize);
12196 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
12197 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
12198 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12199 // Xor the inputs, if resulting sign bit is 0 the product will be
12200 // positive, else negative.
12201 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12202 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
12203 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
12204 return DAG.getSelect(dl, VT, Overflow, Result, Product);
12205 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
12206 SDValue Result =
12207 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12208 SDValue Product = Result.getValue(0);
12209 SDValue Overflow = Result.getValue(1);
12210
12211 APInt MaxVal = APInt::getMaxValue(VTSize);
12212 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12213 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
12214 }
12215 }
12216
12217 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
12218 "Expected scale to be less than the number of bits if signed or at "
12219 "most the number of bits if unsigned.");
12220 assert(LHS.getValueType() == RHS.getValueType() &&
12221 "Expected both operands to be the same type");
12222
12223 // Get the upper and lower bits of the result.
12224 SDValue Lo, Hi;
12225 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
12226 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
12227 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12228 if (isOperationLegalOrCustom(LoHiOp, VT)) {
12229 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
12230 Lo = Result.getValue(0);
12231 Hi = Result.getValue(1);
12232 } else if (isOperationLegalOrCustom(HiOp, VT)) {
12233 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12234 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
12235 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
12236 // Try for a multiplication using a wider type.
12237 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12238 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
12239 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
12240 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
12241 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
12242 SDValue Shifted =
12243 DAG.getNode(ISD::SRA, dl, WideVT, Res,
12244 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
12245 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
12246 } else if (VT.isVector()) {
12247 return SDValue();
12248 } else {
12249 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
12250 }
12251
12252 if (Scale == VTSize)
12253 // Result is just the top half since we'd be shifting by the width of the
12254 // operand. Overflow impossible so this works for both UMULFIX and
12255 // UMULFIXSAT.
12256 return Hi;
12257
12258 // The result will need to be shifted right by the scale since both operands
12259 // are scaled. The result is given to us in 2 halves, so we only want part of
12260 // both in the result.
12261 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
12262 DAG.getShiftAmountConstant(Scale, VT, dl));
12263 if (!Saturating)
12264 return Result;
12265
12266 if (!Signed) {
12267 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
12268 // widened multiplication) aren't all zeroes.
12269
12270 // Saturate to max if ((Hi >> Scale) != 0),
12271 // which is the same as if (Hi > ((1 << Scale) - 1))
12272 APInt MaxVal = APInt::getMaxValue(VTSize);
12273 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
12274 dl, VT);
12275 Result = DAG.getSelectCC(dl, Hi, LowMask,
12276 DAG.getConstant(MaxVal, dl, VT), Result,
12277 ISD::SETUGT);
12278
12279 return Result;
12280 }
12281
12282 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
12283 // widened multiplication) aren't all ones or all zeroes.
12284
12285 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
12286 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
12287
12288 if (Scale == 0) {
12289 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
12290 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
12291 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
12292 // Saturated to SatMin if wide product is negative, and SatMax if wide
12293 // product is positive ...
12294 SDValue Zero = DAG.getConstant(0, dl, VT);
12295 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
12296 ISD::SETLT);
12297 // ... but only if we overflowed.
12298 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
12299 }
12300
12301 // We handled Scale==0 above so all the bits to examine is in Hi.
12302
12303 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
12304 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
12305 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
12306 dl, VT);
12307 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
12308 // Saturate to min if (Hi >> (Scale - 1)) < -1),
12309 // which is the same as if (HI < (-1 << (Scale - 1))
12310 SDValue HighMask =
12311 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
12312 dl, VT);
12313 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
12314 return Result;
12315}
12316
12317SDValue
12319 SDValue LHS, SDValue RHS,
12320 unsigned Scale, SelectionDAG &DAG) const {
12321 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
12322 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
12323 "Expected a fixed point division opcode");
12324
12325 EVT VT = LHS.getValueType();
12326 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
12327 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
12328 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12329
12330 // If there is enough room in the type to upscale the LHS or downscale the
12331 // RHS before the division, we can perform it in this type without having to
12332 // resize. For signed operations, the LHS headroom is the number of
12333 // redundant sign bits, and for unsigned ones it is the number of zeroes.
12334 // The headroom for the RHS is the number of trailing zeroes.
12335 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
12337 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12338
12339 // For signed saturating operations, we need to be able to detect true integer
12340 // division overflow; that is, when you have MIN / -EPS. However, this
12341 // is undefined behavior and if we emit divisions that could take such
12342 // values it may cause undesired behavior (arithmetic exceptions on x86, for
12343 // example).
12344 // Avoid this by requiring an extra bit so that we never get this case.
12345 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
12346 // signed saturating division, we need to emit a whopping 32-bit division.
12347 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
12348 return SDValue();
12349
12350 unsigned LHSShift = std::min(LHSLead, Scale);
12351 unsigned RHSShift = Scale - LHSShift;
12352
12353 // At this point, we know that if we shift the LHS up by LHSShift and the
12354 // RHS down by RHSShift, we can emit a regular division with a final scaling
12355 // factor of Scale.
12356
12357 if (LHSShift)
12358 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
12359 DAG.getShiftAmountConstant(LHSShift, VT, dl));
12360 if (RHSShift)
12361 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
12362 DAG.getShiftAmountConstant(RHSShift, VT, dl));
12363
12364 SDValue Quot;
12365 if (Signed) {
12366 // For signed operations, if the resulting quotient is negative and the
12367 // remainder is nonzero, subtract 1 from the quotient to round towards
12368 // negative infinity.
12369 SDValue Rem;
12370 // FIXME: Ideally we would always produce an SDIVREM here, but if the
12371 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
12372 // we couldn't just form a libcall, but the type legalizer doesn't do it.
12373 if (isTypeLegal(VT) &&
12375 Quot = DAG.getNode(ISD::SDIVREM, dl,
12376 DAG.getVTList(VT, VT),
12377 LHS, RHS);
12378 Rem = Quot.getValue(1);
12379 Quot = Quot.getValue(0);
12380 } else {
12381 Quot = DAG.getNode(ISD::SDIV, dl, VT,
12382 LHS, RHS);
12383 Rem = DAG.getNode(ISD::SREM, dl, VT,
12384 LHS, RHS);
12385 }
12386 SDValue Zero = DAG.getConstant(0, dl, VT);
12387 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
12388 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
12389 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
12390 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
12391 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
12392 DAG.getConstant(1, dl, VT));
12393 Quot = DAG.getSelect(dl, VT,
12394 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
12395 Sub1, Quot);
12396 } else
12397 Quot = DAG.getNode(ISD::UDIV, dl, VT,
12398 LHS, RHS);
12399
12400 return Quot;
12401}
12402
12404 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12405 SDLoc dl(Node);
12406 SDValue LHS = Node->getOperand(0);
12407 SDValue RHS = Node->getOperand(1);
12408 bool IsAdd = Node->getOpcode() == ISD::UADDO;
12409
12410 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
12411 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
12412 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
12413 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
12414 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
12415 { LHS, RHS, CarryIn });
12416 Result = SDValue(NodeCarry.getNode(), 0);
12417 Overflow = SDValue(NodeCarry.getNode(), 1);
12418 return;
12419 }
12420
12421 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12422 LHS.getValueType(), LHS, RHS);
12423
12424 EVT ResultType = Node->getValueType(1);
12425 EVT SetCCType = getSetCCResultType(
12426 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12427 SDValue SetCC;
12428 if (IsAdd && isOneConstant(RHS)) {
12429 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
12430 // the live range of X. We assume comparing with 0 is cheap.
12431 // The general case (X + C) < C is not necessarily beneficial. Although we
12432 // reduce the live range of X, we may introduce the materialization of
12433 // constant C.
12434 SetCC =
12435 DAG.getSetCC(dl, SetCCType, Result,
12436 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
12437 } else if (IsAdd && isAllOnesConstant(RHS)) {
12438 // Special case: uaddo X, -1 overflows if X != 0.
12439 SetCC =
12440 DAG.getSetCC(dl, SetCCType, LHS,
12441 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
12442 } else {
12443 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
12444 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
12445 }
12446 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12447}
12448
12450 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12451 SDLoc dl(Node);
12452 SDValue LHS = Node->getOperand(0);
12453 SDValue RHS = Node->getOperand(1);
12454 bool IsAdd = Node->getOpcode() == ISD::SADDO;
12455
12456 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12457 LHS.getValueType(), LHS, RHS);
12458
12459 EVT ResultType = Node->getValueType(1);
12460 EVT OType = getSetCCResultType(
12461 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12462
12463 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
12464 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
12465 if (isOperationLegal(OpcSat, LHS.getValueType())) {
12466 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
12467 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
12468 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12469 return;
12470 }
12471
12472 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
12473
12474 if (IsAdd) {
12475 // For an addition, the result should be less than one of the operands (LHS)
12476 // if and only if the other operand (RHS) is negative, otherwise there will
12477 // be overflow.
12478 SDValue ResultLowerThanLHS =
12479 DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
12480 SDValue RHSNegative = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETLT);
12481 Overflow = DAG.getBoolExtOrTrunc(
12482 DAG.getNode(ISD::XOR, dl, OType, RHSNegative, ResultLowerThanLHS), dl,
12483 ResultType, ResultType);
12484 } else {
12485 // For subtraction, overflow occurs when the signed comparison of operands
12486 // doesn't match the sign of the result.
12487 SDValue LHSLessThanRHS = DAG.getSetCC(dl, OType, LHS, RHS, ISD::SETLT);
12488 SDValue ResultNegative = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETLT);
12489 Overflow = DAG.getBoolExtOrTrunc(
12490 DAG.getNode(ISD::XOR, dl, OType, LHSLessThanRHS, ResultNegative), dl,
12491 ResultType, ResultType);
12492 }
12493}
12494
12496 SDValue &Overflow, SelectionDAG &DAG) const {
12497 SDLoc dl(Node);
12498 EVT VT = Node->getValueType(0);
12499 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12500 SDValue LHS = Node->getOperand(0);
12501 SDValue RHS = Node->getOperand(1);
12502 bool isSigned = Node->getOpcode() == ISD::SMULO;
12503
12504 // For power-of-two multiplications we can use a simpler shift expansion.
12505 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
12506 const APInt &C = RHSC->getAPIntValue();
12507 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
12508 if (C.isPowerOf2()) {
12509 // smulo(x, signed_min) is same as umulo(x, signed_min).
12510 bool UseArithShift = isSigned && !C.isMinSignedValue();
12511 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
12512 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
12513 Overflow = DAG.getSetCC(dl, SetCCVT,
12514 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
12515 dl, VT, Result, ShiftAmt),
12516 LHS, ISD::SETNE);
12517 return true;
12518 }
12519 }
12520
12521 SDValue BottomHalf;
12522 SDValue TopHalf;
12523 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12524
12525 static const unsigned Ops[2][3] =
12528 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
12529 BottomHalf = DAG.getNode(Ops[isSigned][0], dl, DAG.getVTList(VT, VT), LHS,
12530 RHS);
12531 TopHalf = BottomHalf.getValue(1);
12532 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
12533 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12534 TopHalf = DAG.getNode(Ops[isSigned][1], dl, VT, LHS, RHS);
12535 } else if (isTypeLegal(WideVT)) {
12536 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
12537 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
12538 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
12539 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
12540 SDValue ShiftAmt =
12541 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
12542 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
12543 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
12544 } else {
12545 if (VT.isVector())
12546 return false;
12547
12548 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
12549 }
12550
12551 Result = BottomHalf;
12552 if (isSigned) {
12553 SDValue ShiftAmt = DAG.getShiftAmountConstant(
12554 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
12555 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
12556 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
12557 } else {
12558 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
12559 DAG.getConstant(0, dl, VT), ISD::SETNE);
12560 }
12561
12562 // Truncate the result if SetCC returns a larger type than needed.
12563 EVT RType = Node->getValueType(1);
12564 if (RType.bitsLT(Overflow.getValueType()))
12565 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
12566
12567 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
12568 "Unexpected result type for S/UMULO legalization");
12569 return true;
12570}
12571
12573 SDLoc dl(Node);
12574 ISD::NodeType BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12575 SDValue Op = Node->getOperand(0);
12576 SDNodeFlags Flags = Node->getFlags();
12577 EVT VT = Op.getValueType();
12578
12579 // Try to use a shuffle reduction for power of two vectors.
12580 if (VT.isPow2VectorType()) {
12581 // See if the reduction opcode is safe to use with widened types.
12582 bool WidenSrc = false;
12583 switch (Node->getOpcode()) {
12586 case ISD::VECREDUCE_ADD:
12587 case ISD::VECREDUCE_MUL:
12588 case ISD::VECREDUCE_AND:
12589 case ISD::VECREDUCE_OR:
12590 case ISD::VECREDUCE_XOR:
12595 WidenSrc = VT.isFixedLengthVector();
12596 break;
12597 }
12598
12600 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
12601 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT)) {
12602 if (WidenSrc && Op.getOpcode() != ISD::BUILD_VECTOR) {
12603 // Attempt to widen the source vectors to a legal op.
12604 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), HalfVT);
12605 if (WideVT.isVector() &&
12606 WideVT.getScalarType() == HalfVT.getScalarType() &&
12607 WideVT.getVectorNumElements() >= HalfVT.getVectorNumElements() &&
12608 isOperationLegalOrCustom(BaseOpcode, WideVT)) {
12609 SDValue Lo, Hi;
12610 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12611 Lo = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Lo, 0);
12612 Hi = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Hi, 0);
12613 Op = DAG.getNode(BaseOpcode, dl, WideVT, Lo, Hi, Flags);
12614 Op = DAG.getExtractSubvector(dl, HalfVT, Op, 0);
12615 VT = HalfVT;
12616 continue;
12617 }
12618 }
12619 break;
12620 }
12621
12622 SDValue Lo, Hi;
12623 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12624 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Flags);
12625 VT = HalfVT;
12626
12627 // Stop if splitting is enough to make the reduction legal.
12628 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
12629 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
12630 Flags);
12631 }
12632 }
12633
12634 if (VT.isScalableVector())
12636 "Expanding reductions for scalable vectors is undefined.");
12637
12638 EVT EltVT = VT.getVectorElementType();
12639 unsigned NumElts = VT.getVectorNumElements();
12640
12642 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
12643
12644 SDValue Res = Ops[0];
12645 for (unsigned i = 1; i < NumElts; i++)
12646 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12647
12648 // Result type may be wider than element type.
12649 if (EltVT != Node->getValueType(0))
12650 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
12651 return Res;
12652}
12653
12655 SDLoc dl(Node);
12656 SDValue AccOp = Node->getOperand(0);
12657 SDValue VecOp = Node->getOperand(1);
12658 SDNodeFlags Flags = Node->getFlags();
12659
12660 EVT VT = VecOp.getValueType();
12661 EVT EltVT = VT.getVectorElementType();
12662
12663 if (VT.isScalableVector())
12665 "Expanding reductions for scalable vectors is undefined.");
12666
12667 unsigned NumElts = VT.getVectorNumElements();
12668
12670 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
12671
12672 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12673
12674 SDValue Res = AccOp;
12675 for (unsigned i = 0; i < NumElts; i++)
12676 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12677
12678 return Res;
12679}
12680
12682 SelectionDAG &DAG) const {
12683 EVT VT = Node->getValueType(0);
12684 SDLoc dl(Node);
12685 bool isSigned = Node->getOpcode() == ISD::SREM;
12686 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
12687 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
12688 SDValue Dividend = Node->getOperand(0);
12689 SDValue Divisor = Node->getOperand(1);
12690 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
12691 SDVTList VTs = DAG.getVTList(VT, VT);
12692 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
12693 return true;
12694 }
12695 if (isOperationLegalOrCustom(DivOpc, VT)) {
12696 // X % Y -> X-X/Y*Y
12697 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
12698 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
12699 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
12700 return true;
12701 }
12702 return false;
12703}
12704
12706 SelectionDAG &DAG) const {
12707 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
12708 SDLoc dl(SDValue(Node, 0));
12709 SDValue Src = Node->getOperand(0);
12710
12711 // DstVT is the result type, while SatVT is the size to which we saturate
12712 EVT SrcVT = Src.getValueType();
12713 EVT DstVT = Node->getValueType(0);
12714
12715 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
12716 unsigned SatWidth = SatVT.getScalarSizeInBits();
12717 unsigned DstWidth = DstVT.getScalarSizeInBits();
12718 assert(SatWidth <= DstWidth &&
12719 "Expected saturation width smaller than result width");
12720
12721 // Determine minimum and maximum integer values and their corresponding
12722 // floating-point values.
12723 APInt MinInt, MaxInt;
12724 if (IsSigned) {
12725 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
12726 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
12727 } else {
12728 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
12729 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
12730 }
12731
12732 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
12733 // libcall emission cannot handle this. Large result types will fail.
12734 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
12735 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
12736 SrcVT = Src.getValueType();
12737 }
12738
12739 const fltSemantics &Sem = SrcVT.getFltSemantics();
12740 APFloat MinFloat(Sem);
12741 APFloat MaxFloat(Sem);
12742
12743 APFloat::opStatus MinStatus =
12744 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
12745 APFloat::opStatus MaxStatus =
12746 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
12747 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
12748 !(MaxStatus & APFloat::opStatus::opInexact);
12749
12750 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
12751 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
12752
12753 // If the integer bounds are exactly representable as floats and min/max are
12754 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
12755 // of comparisons and selects.
12756 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
12757 bool MayPropagateNaN) {
12758 bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
12759 isOperationLegalOrCustom(MaxOpcode, SrcVT);
12760 if (!MinMaxLegal)
12761 return SDValue();
12762
12763 SDValue Clamped = Src;
12764
12765 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
12766 // then the result is MinFloat.
12767 Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
12768 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
12769 Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
12770 // Convert clamped value to integer.
12771 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
12772 dl, DstVT, Clamped);
12773
12774 // If !MayPropagateNan and the conversion is unsigned case we're done,
12775 // because we mapped NaN to MinFloat, which will cast to zero.
12776 if (!MayPropagateNaN && !IsSigned)
12777 return FpToInt;
12778
12779 // Otherwise, select 0 if Src is NaN.
12780 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12781 EVT SetCCVT =
12782 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12783 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12784 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
12785 };
12786 if (AreExactFloatBounds) {
12787 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
12788 /*MayPropagateNaN=*/false))
12789 return Res;
12790 // These may propagate NaN for sNaN operands.
12791 if (SDValue Res =
12792 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
12793 return Res;
12794 // These always propagate NaN.
12795 if (SDValue Res =
12796 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
12797 return Res;
12798 }
12799
12800 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
12801 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
12802
12803 // Result of direct conversion. The assumption here is that the operation is
12804 // non-trapping and it's fine to apply it to an out-of-range value if we
12805 // select it away later.
12806 SDValue FpToInt =
12807 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
12808
12809 SDValue Select = FpToInt;
12810
12811 EVT SetCCVT =
12812 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12813
12814 // If Src ULT MinFloat, select MinInt. In particular, this also selects
12815 // MinInt if Src is NaN.
12816 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
12817 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
12818 // If Src OGT MaxFloat, select MaxInt.
12819 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
12820 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
12821
12822 // In the unsigned case we are done, because we mapped NaN to MinInt, which
12823 // is already zero.
12824 if (!IsSigned)
12825 return Select;
12826
12827 // Otherwise, select 0 if Src is NaN.
12828 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12829 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12830 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
12831}
12832
12834 const SDLoc &dl,
12835 SelectionDAG &DAG) const {
12836 EVT OperandVT = Op.getValueType();
12837 if (OperandVT.getScalarType() == ResultVT.getScalarType())
12838 return Op;
12839 EVT ResultIntVT = ResultVT.changeTypeToInteger();
12840 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12841 // can induce double-rounding which may alter the results. We can
12842 // correct for this using a trick explained in: Boldo, Sylvie, and
12843 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12844 // World Congress. 2005.
12845 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
12846 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
12847
12848 // We can keep the narrow value as-is if narrowing was exact (no
12849 // rounding error), the wide value was NaN (the narrow value is also
12850 // NaN and should be preserved) or if we rounded to the odd value.
12851 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
12852 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
12853 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
12854 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
12855 EVT ResultIntVTCCVT = getSetCCResultType(
12856 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
12857 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
12858 // The result is already odd so we don't need to do anything.
12859 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
12860
12861 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12862 Op.getValueType());
12863 // We keep results which are exact, odd or NaN.
12864 SDValue KeepNarrow =
12865 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
12866 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
12867 // We morally performed a round-down if AbsNarrow is smaller than
12868 // AbsWide.
12869 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
12870 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
12871 SDValue NarrowIsRd =
12872 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
12873 // If the narrow value is odd or exact, pick it.
12874 // Otherwise, narrow is even and corresponds to either the rounded-up
12875 // or rounded-down value. If narrow is the rounded-down value, we want
12876 // the rounded-up value as it will be odd.
12877 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
12878 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
12879 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
12880 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
12881}
12882
12884 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12885 SDValue Op = Node->getOperand(0);
12886 EVT VT = Node->getValueType(0);
12887 SDLoc dl(Node);
12888 if (VT.getScalarType() == MVT::bf16) {
12889 if (Node->getConstantOperandVal(1) == 1) {
12890 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
12891 }
12892 EVT OperandVT = Op.getValueType();
12893 SDValue IsNaN = DAG.getSetCC(
12894 dl,
12895 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
12896 Op, Op, ISD::SETUO);
12897
12898 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12899 // can induce double-rounding which may alter the results. We can
12900 // correct for this using a trick explained in: Boldo, Sylvie, and
12901 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12902 // World Congress. 2005.
12903 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
12904 EVT I32 = F32.changeTypeToInteger();
12905 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
12906 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12907
12908 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12909 // turning into infinities.
12910 SDValue NaN =
12911 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
12912
12913 // Factor in the contribution of the low 16 bits.
12914 SDValue One = DAG.getConstant(1, dl, I32);
12915 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
12916 DAG.getShiftAmountConstant(16, I32, dl));
12917 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
12918 SDValue RoundingBias =
12919 DAG.getNode(ISD::ADD, dl, I32, Lsb, DAG.getConstant(0x7fff, dl, I32));
12920 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
12921
12922 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12923 // 0x80000000.
12924 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
12925
12926 // Now that we have rounded, shift the bits into position.
12927 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
12928 DAG.getShiftAmountConstant(16, I32, dl));
12929 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
12930 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
12931 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
12932 }
12933 return SDValue();
12934}
12935
12937 SelectionDAG &DAG) const {
12938 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12939 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12940 "Unexpected opcode!");
12941 assert((Node->getValueType(0).isScalableVector() ||
12942 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12943 "Fixed length vector types with constant offsets expected to use "
12944 "SHUFFLE_VECTOR!");
12945
12946 EVT VT = Node->getValueType(0);
12947 SDValue V1 = Node->getOperand(0);
12948 SDValue V2 = Node->getOperand(1);
12949 SDValue Offset = Node->getOperand(2);
12950 SDLoc DL(Node);
12951
12952 // Expand through memory thusly:
12953 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12954 // Store V1, Ptr
12955 // Store V2, Ptr + sizeof(V1)
12956 // if (VECTOR_SPLICE_LEFT)
12957 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12958 // else
12959 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12960 // Res = Load Ptr
12961
12962 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12963
12965 VT.getVectorElementCount() * 2);
12966 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12967 EVT PtrVT = StackPtr.getValueType();
12968 auto &MF = DAG.getMachineFunction();
12969 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12970 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12971
12972 // Store the lo part of CONCAT_VECTORS(V1, V2)
12973 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
12974 // Store the hi part of CONCAT_VECTORS(V1, V2)
12975 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
12976 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
12977 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
12978
12979 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12980 SDValue EltByteSize =
12981 DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize());
12982 Offset = DAG.getZExtOrTrunc(Offset, DL, PtrVT);
12983 SDValue TrailingBytes = DAG.getNode(ISD::MUL, DL, PtrVT, Offset, EltByteSize);
12984
12985 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
12986
12987 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12988 StackPtr = DAG.getMemBasePlusOffset(StackPtr, TrailingBytes, DL);
12989 else
12990 StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
12991
12992 // Load the spliced result
12993 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
12995}
12996
12998 SelectionDAG &DAG) const {
12999 SDLoc DL(Node);
13000 SDValue Vec = Node->getOperand(0);
13001 SDValue Mask = Node->getOperand(1);
13002 SDValue Passthru = Node->getOperand(2);
13003
13004 EVT VecVT = Vec.getValueType();
13005 EVT ScalarVT = VecVT.getScalarType();
13006 EVT MaskVT = Mask.getValueType();
13007 EVT MaskScalarVT = MaskVT.getScalarType();
13008
13009 // Needs to be handled by targets that have scalable vector types.
13010 if (VecVT.isScalableVector())
13011 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
13012
13013 SDValue StackPtr = DAG.CreateStackTemporary(
13014 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
13015 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
13016 MachinePointerInfo PtrInfo =
13018
13019 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
13020 SDValue Chain = DAG.getEntryNode();
13021 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
13022
13023 bool HasPassthru = !Passthru.isUndef();
13024
13025 // If we have a passthru vector, store it on the stack, overwrite the matching
13026 // positions and then re-write the last element that was potentially
13027 // overwritten even though mask[i] = false.
13028 if (HasPassthru)
13029 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
13030
13031 SDValue LastWriteVal;
13032 APInt PassthruSplatVal;
13033 bool IsSplatPassthru =
13034 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
13035
13036 if (IsSplatPassthru) {
13037 // As we do not know which position we wrote to last, we cannot simply
13038 // access that index from the passthru vector. So we first check if passthru
13039 // is a splat vector, to use any element ...
13040 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
13041 } else if (HasPassthru) {
13042 // ... if it is not a splat vector, we need to get the passthru value at
13043 // position = popcount(mask) and re-load it from the stack before it is
13044 // overwritten in the loop below.
13045 EVT PopcountVT = ScalarVT.changeTypeToInteger();
13046 SDValue Popcount = DAG.getNode(
13048 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
13049 Popcount = DAG.getNode(
13051 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
13052 Popcount);
13053 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
13054 SDValue LastElmtPtr =
13055 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
13056 LastWriteVal = DAG.getLoad(
13057 ScalarVT, DL, Chain, LastElmtPtr,
13059 Chain = LastWriteVal.getValue(1);
13060 }
13061
13062 unsigned NumElms = VecVT.getVectorNumElements();
13063 for (unsigned I = 0; I < NumElms; I++) {
13064 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
13065 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13066 Chain = DAG.getStore(
13067 Chain, DL, ValI, OutPtr,
13069
13070 // Get the mask value and add it to the current output position. This
13071 // either increments by 1 if MaskI is true or adds 0 otherwise.
13072 // Freeze in case we have poison/undef mask entries.
13073 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
13074 MaskI = DAG.getFreeze(MaskI);
13075 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
13076 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
13077 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
13078
13079 if (HasPassthru && I == NumElms - 1) {
13080 SDValue EndOfVector =
13081 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
13082 SDValue AllLanesSelected =
13083 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
13084 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
13085 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13086
13087 // Re-write the last ValI if all lanes were selected. Otherwise,
13088 // overwrite the last write it with the passthru value.
13089 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
13090 LastWriteVal, SDNodeFlags::Unpredictable);
13091 Chain = DAG.getStore(
13092 Chain, DL, LastWriteVal, OutPtr,
13094 }
13095 }
13096
13097 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
13098}
13099
13101 SDLoc DL(Node);
13102 EVT VT = Node->getValueType(0);
13103
13104 bool ZeroIsPoison = Node->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON;
13105 auto [Mask, StepVec] =
13106 getLegalMaskAndStepVector(Node->getOperand(0), ZeroIsPoison, DL, DAG);
13107 EVT StepVecVT = StepVec.getValueType();
13108 EVT StepVT = StepVecVT.getVectorElementType();
13109
13110 // Promote the scalar result type early to avoid redundant zexts.
13112 StepVT = getTypeToTransformTo(*DAG.getContext(), StepVT);
13113
13114 SDValue VL =
13115 DAG.getElementCount(DL, StepVT, StepVecVT.getVectorElementCount());
13116 SDValue SplatVL = DAG.getSplat(StepVecVT, DL, VL);
13117 StepVec = DAG.getNode(ISD::SUB, DL, StepVecVT, SplatVL, StepVec);
13118 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
13119 SDValue Select = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
13121 StepVecVT.getVectorElementType(), Select);
13122 SDValue Sub = DAG.getNode(ISD::SUB, DL, StepVT, VL,
13123 DAG.getZExtOrTrunc(Max, DL, StepVT));
13124
13125 return DAG.getZExtOrTrunc(Sub, DL, VT);
13126}
13127
13129 SelectionDAG &DAG) const {
13130 SDLoc DL(N);
13131 SDValue Acc = N->getOperand(0);
13132 SDValue MulLHS = N->getOperand(1);
13133 SDValue MulRHS = N->getOperand(2);
13134 EVT AccVT = Acc.getValueType();
13135 EVT MulOpVT = MulLHS.getValueType();
13136
13137 EVT ExtMulOpVT =
13139 MulOpVT.getVectorElementCount());
13140
13141 unsigned ExtOpcLHS, ExtOpcRHS;
13142 switch (N->getOpcode()) {
13143 default:
13144 llvm_unreachable("Unexpected opcode");
13146 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
13147 break;
13149 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
13150 break;
13152 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
13153 break;
13154 }
13155
13156 if (ExtMulOpVT != MulOpVT) {
13157 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
13158 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
13159 }
13160 SDValue Input = MulLHS;
13161 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
13162 if (!llvm::isOneOrOneSplatFP(MulRHS))
13163 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13164 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
13165 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13166 }
13167
13168 unsigned Stride = AccVT.getVectorMinNumElements();
13169 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
13170
13171 // Collect all of the subvectors
13172 std::deque<SDValue> Subvectors = {Acc};
13173 for (unsigned I = 0; I < ScaleFactor; I++)
13174 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
13175
13176 unsigned FlatNode =
13177 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
13178
13179 // Flatten the subvector tree
13180 while (Subvectors.size() > 1) {
13181 Subvectors.push_back(
13182 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
13183 Subvectors.pop_front();
13184 Subvectors.pop_front();
13185 }
13186
13187 assert(Subvectors.size() == 1 &&
13188 "There should only be one subvector after tree flattening");
13189
13190 return Subvectors[0];
13191}
13192
13193/// Given a store node \p StoreNode, return true if it is safe to fold that node
13194/// into \p FPNode, which expands to a library call with output pointers.
13196 SDNode *FPNode) {
13198 SmallVector<const SDNode *, 8> DeferredNodes;
13200
13201 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
13202 for (SDValue Op : StoreNode->ops())
13203 if (Op.getNode() != FPNode)
13204 Worklist.push_back(Op.getNode());
13205
13207 while (!Worklist.empty()) {
13208 const SDNode *Node = Worklist.pop_back_val();
13209 auto [_, Inserted] = Visited.insert(Node);
13210 if (!Inserted)
13211 continue;
13212
13213 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
13214 return false;
13215
13216 // Reached the FPNode (would result in a cycle).
13217 // OR Reached CALLSEQ_START (would result in nested call sequences).
13218 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
13219 return false;
13220
13221 if (Node->getOpcode() == ISD::CALLSEQ_END) {
13222 // Defer looking into call sequences (so we can check we're outside one).
13223 // We still need to look through these for the predecessor check.
13224 DeferredNodes.push_back(Node);
13225 continue;
13226 }
13227
13228 for (SDValue Op : Node->ops())
13229 Worklist.push_back(Op.getNode());
13230 }
13231
13232 // True if we're outside a call sequence and don't have the FPNode as a
13233 // predecessor. No cycles or nested call sequences possible.
13234 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
13235 MaxSteps);
13236}
13237
13239 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
13241 std::optional<unsigned> CallRetResNo) const {
13242 if (LC == RTLIB::UNKNOWN_LIBCALL)
13243 return false;
13244
13245 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
13246 if (LibcallImpl == RTLIB::Unsupported)
13247 return false;
13248
13249 LLVMContext &Ctx = *DAG.getContext();
13250 EVT VT = Node->getValueType(0);
13251 unsigned NumResults = Node->getNumValues();
13252
13253 // Find users of the node that store the results (and share input chains). The
13254 // destination pointers can be used instead of creating stack allocations.
13255 SDValue StoresInChain;
13256 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
13257 for (SDNode *User : Node->users()) {
13259 continue;
13260 auto *ST = cast<StoreSDNode>(User);
13261 SDValue StoreValue = ST->getValue();
13262 unsigned ResNo = StoreValue.getResNo();
13263 // Ensure the store corresponds to an output pointer.
13264 if (CallRetResNo == ResNo)
13265 continue;
13266 // Ensure the store to the default address space and not atomic or volatile.
13267 if (!ST->isSimple() || ST->getAddressSpace() != 0)
13268 continue;
13269 // Ensure all store chains are the same (so they don't alias).
13270 if (StoresInChain && ST->getChain() != StoresInChain)
13271 continue;
13272 // Ensure the store is properly aligned.
13273 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
13274 if (ST->getAlign() <
13275 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
13276 continue;
13277 // Avoid:
13278 // 1. Creating cyclic dependencies.
13279 // 2. Expanding the node to a call within a call sequence.
13281 continue;
13282 ResultStores[ResNo] = ST;
13283 StoresInChain = ST->getChain();
13284 }
13285
13286 ArgListTy Args;
13287
13288 // Pass the arguments.
13289 for (const SDValue &Op : Node->op_values()) {
13290 EVT ArgVT = Op.getValueType();
13291 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
13292 Args.emplace_back(Op, ArgTy);
13293 }
13294
13295 // Pass the output pointers.
13296 SmallVector<SDValue, 2> ResultPtrs(NumResults);
13298 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
13299 if (ResNo == CallRetResNo)
13300 continue;
13301 EVT ResVT = Node->getValueType(ResNo);
13302 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
13303 ResultPtrs[ResNo] = ResultPtr;
13304 Args.emplace_back(ResultPtr, PointerTy);
13305 }
13306
13307 SDLoc DL(Node);
13308
13310 // Pass the vector mask (if required).
13311 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
13312 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
13313 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
13314 }
13315
13316 Type *RetType = CallRetResNo.has_value()
13317 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
13318 : Type::getVoidTy(Ctx);
13319 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
13320 SDValue Callee =
13321 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
13323 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
13324 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
13325
13326 auto [Call, CallChain] = LowerCallTo(CLI);
13327
13328 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
13329 if (ResNo == CallRetResNo) {
13330 Results.push_back(Call);
13331 continue;
13332 }
13333 MachinePointerInfo PtrInfo;
13334 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
13335 ResultPtr, PtrInfo);
13336 SDValue OutChain = LoadResult.getValue(1);
13337
13338 if (StoreSDNode *ST = ResultStores[ResNo]) {
13339 // Replace store with the library call.
13340 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
13341 PtrInfo = ST->getPointerInfo();
13342 } else {
13344 DAG.getMachineFunction(),
13345 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
13346 }
13347
13348 Results.push_back(LoadResult);
13349 }
13350
13351 return true;
13352}
13353
13355 SDValue &LHS, SDValue &RHS,
13356 SDValue &CC, SDValue Mask,
13357 SDValue EVL, bool &NeedInvert,
13358 const SDLoc &dl, SDValue &Chain,
13359 bool IsSignaling) const {
13360 MVT OpVT = LHS.getSimpleValueType();
13361 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
13362 NeedInvert = false;
13363 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
13364 bool IsNonVP = !EVL;
13365 switch (getCondCodeAction(CCCode, OpVT)) {
13366 default:
13367 llvm_unreachable("Unknown condition code action!");
13369 // Nothing to do.
13370 break;
13373 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13374 std::swap(LHS, RHS);
13375 CC = DAG.getCondCode(InvCC);
13376 return true;
13377 }
13378 // Swapping operands didn't work. Try inverting the condition.
13379 bool NeedSwap = false;
13380 InvCC = getSetCCInverse(CCCode, OpVT);
13381 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
13382 // If inverting the condition is not enough, try swapping operands
13383 // on top of it.
13384 InvCC = ISD::getSetCCSwappedOperands(InvCC);
13385 NeedSwap = true;
13386 }
13387 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13388 CC = DAG.getCondCode(InvCC);
13389 NeedInvert = true;
13390 if (NeedSwap)
13391 std::swap(LHS, RHS);
13392 return true;
13393 }
13394
13395 // Special case: expand i1 comparisons using logical operations.
13396 if (OpVT == MVT::i1) {
13397 SDValue Ret;
13398 switch (CCCode) {
13399 default:
13400 llvm_unreachable("Unknown integer setcc!");
13401 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
13402 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
13403 MVT::i1);
13404 break;
13405 case ISD::SETNE: // X != Y --> (X ^ Y)
13406 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
13407 break;
13408 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13409 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13410 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
13411 DAG.getNOT(dl, LHS, MVT::i1));
13412 break;
13413 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13414 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13415 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
13416 DAG.getNOT(dl, RHS, MVT::i1));
13417 break;
13418 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13419 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13420 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
13421 DAG.getNOT(dl, LHS, MVT::i1));
13422 break;
13423 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13424 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13425 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
13426 DAG.getNOT(dl, RHS, MVT::i1));
13427 break;
13428 }
13429
13430 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
13431 RHS = SDValue();
13432 CC = SDValue();
13433 return true;
13434 }
13435
13437 unsigned Opc = 0;
13438 switch (CCCode) {
13439 default:
13440 llvm_unreachable("Don't know how to expand this condition!");
13441 case ISD::SETUO:
13442 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
13443 CC1 = ISD::SETUNE;
13444 CC2 = ISD::SETUNE;
13445 Opc = ISD::OR;
13446 break;
13447 }
13449 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
13450 NeedInvert = true;
13451 [[fallthrough]];
13452 case ISD::SETO:
13454 "If SETO is expanded, SETOEQ must be legal!");
13455 CC1 = ISD::SETOEQ;
13456 CC2 = ISD::SETOEQ;
13457 Opc = ISD::AND;
13458 break;
13459 case ISD::SETONE:
13460 case ISD::SETUEQ:
13461 // If the SETUO or SETO CC isn't legal, we might be able to use
13462 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
13463 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
13464 // the operands.
13465 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13466 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
13467 isCondCodeLegal(ISD::SETOLT, OpVT))) {
13468 CC1 = ISD::SETOGT;
13469 CC2 = ISD::SETOLT;
13470 Opc = ISD::OR;
13471 NeedInvert = ((unsigned)CCCode & 0x8U);
13472 break;
13473 }
13474 [[fallthrough]];
13475 case ISD::SETOEQ:
13476 case ISD::SETOGT:
13477 case ISD::SETOGE:
13478 case ISD::SETOLT:
13479 case ISD::SETOLE:
13480 case ISD::SETUNE:
13481 case ISD::SETUGT:
13482 case ISD::SETUGE:
13483 case ISD::SETULT:
13484 case ISD::SETULE:
13485 // If we are floating point, assign and break, otherwise fall through.
13486 if (!OpVT.isInteger()) {
13487 // We can use the 4th bit to tell if we are the unordered
13488 // or ordered version of the opcode.
13489 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13490 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
13491 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
13492 break;
13493 }
13494 // Fallthrough if we are unsigned integer.
13495 [[fallthrough]];
13496 case ISD::SETLE:
13497 case ISD::SETGT:
13498 case ISD::SETGE:
13499 case ISD::SETLT:
13500 case ISD::SETNE:
13501 case ISD::SETEQ:
13502 // If all combinations of inverting the condition and swapping operands
13503 // didn't work then we have no means to expand the condition.
13504 llvm_unreachable("Don't know how to expand this condition!");
13505 }
13506
13507 SDValue SetCC1, SetCC2;
13508 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
13509 // If we aren't the ordered or unorder operation,
13510 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
13511 if (IsNonVP) {
13512 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
13513 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
13514 } else {
13515 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
13516 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
13517 }
13518 } else {
13519 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
13520 if (IsNonVP) {
13521 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
13522 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
13523 } else {
13524 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
13525 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
13526 }
13527 }
13528 if (Chain)
13529 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
13530 SetCC2.getValue(1));
13531 if (IsNonVP)
13532 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
13533 else {
13534 // Transform the binary opcode to the VP equivalent.
13535 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
13536 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
13537 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
13538 }
13539 RHS = SDValue();
13540 CC = SDValue();
13541 return true;
13542 }
13543 }
13544 return false;
13545}
13546
13548 SelectionDAG &DAG) const {
13549 EVT VT = Node->getValueType(0);
13550 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
13551 // split into two equal parts.
13552 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
13553 return SDValue();
13554
13555 // Restrict expansion to cases where both parts can be concatenated.
13556 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
13557 if (LoVT != HiVT || !isTypeLegal(LoVT))
13558 return SDValue();
13559
13560 SDLoc DL(Node);
13561 unsigned Opcode = Node->getOpcode();
13562
13563 // Don't expand if the result is likely to be unrolled anyway.
13564 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
13565 return SDValue();
13566
13567 SmallVector<SDValue, 4> LoOps, HiOps;
13568 for (const SDValue &V : Node->op_values()) {
13569 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
13570 LoOps.push_back(Lo);
13571 HiOps.push_back(Hi);
13572 }
13573
13574 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps, Node->getFlags());
13575 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps, Node->getFlags());
13576 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
13577}
13578
13580 const SDLoc &DL,
13581 EVT InVecVT, SDValue EltNo,
13582 LoadSDNode *OriginalLoad,
13583 SelectionDAG &DAG) const {
13584 assert(OriginalLoad->isSimple());
13585
13586 EVT VecEltVT = InVecVT.getVectorElementType();
13587
13588 // If the vector element type is not a multiple of a byte then we are unable
13589 // to correctly compute an address to load only the extracted element as a
13590 // scalar.
13591 if (!VecEltVT.isByteSized())
13592 return SDValue();
13593
13594 ISD::LoadExtType ExtTy =
13595 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
13596 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13597 return SDValue();
13598
13599 std::optional<unsigned> ByteOffset;
13600 Align Alignment = OriginalLoad->getAlign();
13602 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13603 int Elt = ConstEltNo->getZExtValue();
13604 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
13605 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
13606 Alignment = commonAlignment(Alignment, *ByteOffset);
13607 } else {
13608 // Discard the pointer info except the address space because the memory
13609 // operand can't represent this new access since the offset is variable.
13610 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
13611 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
13612 }
13613
13614 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
13615 return SDValue();
13616
13617 unsigned IsFast = 0;
13618 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
13619 OriginalLoad->getAddressSpace(), Alignment,
13620 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
13621 !IsFast)
13622 return SDValue();
13623
13624 // The original DAG loaded the entire vector from memory, so arithmetic
13625 // within it must be inbounds.
13627 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
13628
13629 // We are replacing a vector load with a scalar load. The new load must have
13630 // identical memory op ordering to the original.
13631 SDValue Load;
13632 if (ResultVT.bitsGT(VecEltVT)) {
13633 // If the result type of vextract is wider than the load, then issue an
13634 // extending load instead.
13635 ISD::LoadExtType ExtType =
13636 isLoadLegal(ResultVT, VecEltVT, Alignment,
13637 OriginalLoad->getAddressSpace(), ISD::ZEXTLOAD, false)
13639 : ISD::EXTLOAD;
13640 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
13641 NewPtr, MPI, VecEltVT, Alignment,
13642 OriginalLoad->getMemOperand()->getFlags(),
13643 OriginalLoad->getAAInfo());
13644 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13645 } else {
13646 // The result type is narrower or the same width as the vector element
13647 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
13648 Alignment, OriginalLoad->getMemOperand()->getFlags(),
13649 OriginalLoad->getAAInfo());
13650 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13651 if (ResultVT.bitsLT(VecEltVT))
13652 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
13653 else
13654 Load = DAG.getBitcast(ResultVT, Load);
13655 }
13656
13657 return Load;
13658}
13659
13660// Set type id for call site info and metadata 'call_target'.
13661// We are filtering for:
13662// a) The call-graph-section use case that wants to know about indirect
13663// calls, or
13664// b) We want to annotate indirect calls.
13666 const CallBase *CB, MachineFunction &MF,
13667 MachineFunction::CallSiteInfo &CSInfo) const {
13668 if (CB && CB->isIndirectCall() &&
13671 CSInfo = MachineFunction::CallSiteInfo(*CB);
13672}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned Opcode)
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
uint64_t High
#define P(N)
Function const char * Passes
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static std::pair< SDValue, SDValue > getLegalMaskAndStepVector(SDValue Mask, bool ZeroIsPoison, SDLoc DL, SelectionDAG &DAG)
Returns a type-legalized version of Mask as the first item in the pair.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx, EVT VT, unsigned HalveDepth=0, unsigned TotalDepth=0)
Check if CLMUL on VT can eventually reach a type with legal CLMUL through a chain of halving decompos...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:98
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:222
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:278
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:255
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1406
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1217
APInt bitcastToAPInt() const
Definition APFloat.h:1430
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1157
void changeSign()
Definition APFloat.h:1356
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1168
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1363
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
void negate()
Negate this APInt in place.
Definition APInt.h:1491
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned countLeadingZeros() const
Definition APInt.h:1629
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1458
unsigned logBase2() const
Definition APInt.h:1784
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
void setAllBits()
Set every bit to 1.
Definition APInt.h:1342
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1317
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1390
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1440
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1465
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1366
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:865
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MCRegister getLiveInPhysReg(Register VReg) const
getLiveInPhysReg - If VReg is a live-in virtual register, return the corresponding live-in physical r...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:447
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator end() const
Definition ArrayRef.h:339
iterator begin() const
Definition ArrayRef.h:338
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl, SDNodeFlags Flags={})
Constant fold a setcc to true or false.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isIdentityElement(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo, unsigned Depth=0) const
Returns true if V is an identity element of Opc with Flags.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, UndefPoisonKind Kind=UndefPoisonKind::UndefOrPoison, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, bool OrZero=false, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
iterator end() const
Definition StringRef.h:116
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getBitWidthForCttzElements(EVT RetVT, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_POISON nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandFCANONICALIZE(SDNode *Node, SelectionDAG &DAG) const
Expand FCANONICALIZE to FMUL with 1.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_POISON nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_POISON nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue expandCttzElts(SDNode *Node, SelectionDAG &DAG) const
Expand a CTTZ_ELTS or CTTZ_ELTS_ZERO_POISON by calculating (VL - i) for each active lane (i),...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, unsigned Depth) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_POISON nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
SDValue expandLoopDependenceMask(SDNode *N, SelectionDAG &DAG) const
Expand LOOP_DEPENDENCE_MASK nodes.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using an n/2-bit algorithm.
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_POISON nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandCONVERT_FROM_ARBITRARY_FP(SDNode *Node, SelectionDAG &DAG) const
Expand CONVERT_FROM_ARBITRARY_FP using bit manipulation.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual void computeKnownFPClassForTargetNode(const SDValue Op, KnownFPClass &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine floating-point class information for a target node.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
SDValue expandCTLS(SDNode *N, SelectionDAG &DAG) const
Expand CTLS (count leading sign bits) nodes.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
unsigned EmitCallSiteInfo
The flag enables call site info production.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:785
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:313
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:713
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3061
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:778
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:792
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:791
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:949
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ ABS_MIN_POISON
ABS with a poison result for INT_MIN.
Definition ISDOpcodes.h:751
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
void stable_sort(R &&Range)
Definition STLExtras.h:2115
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1569
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1551
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
@ AfterLegalizeTypes
Definition DAGCombine.h:17
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
fltNonfiniteBehavior
Definition APFloat.h:952
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
Definition UndefPoison.h:20
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1666
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:438
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:486
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:195
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT widenIntegerElementType(LLVMContext &Context) const
Return a VT for an integer element type with doubled bit width.
Definition ValueTypes.h:452
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:315
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:190
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:269
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:97
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:165
KnownBits byteSwap() const
Definition KnownBits.h:553
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
KnownBits reverseBits() const
Definition KnownBits.h:557
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:335
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:67
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:61
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:171
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1013
fltNanEncoding nanEncoding
Definition APFloat.h:1015