LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/Bitset.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/Analysis/Loads.h"
33#include "llvm/IR/Attributes.h"
34#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/Constant.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/DataLayout.h"
39#include "llvm/IR/DebugInfo.h"
41#include "llvm/IR/Function.h"
43#include "llvm/IR/InlineAsm.h"
44#include "llvm/IR/InstrTypes.h"
45#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Intrinsics.h"
49#include "llvm/IR/IntrinsicsAArch64.h"
50#include "llvm/IR/IntrinsicsAMDGPU.h"
51#include "llvm/IR/IntrinsicsARM.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Statepoint.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/User.h"
60#include "llvm/IR/Value.h"
61#include "llvm/IR/ValueHandle.h"
66#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82#include <vector>
83
84#define DEBUG_TYPE "instcombine"
86
87using namespace llvm;
88using namespace PatternMatch;
89
90STATISTIC(NumSimplified, "Number of library calls simplified");
91
93 "instcombine-guard-widening-window",
94 cl::init(3),
95 cl::desc("How wide an instruction window to bypass looking for "
96 "another guard"));
97
98/// Return the specified type promoted as it would be to pass though a va_arg
99/// area.
101 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
102 if (ITy->getBitWidth() < 32)
103 return Type::getInt32Ty(Ty->getContext());
104 }
105 return Ty;
106}
107
108/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
109/// TODO: This should probably be integrated with visitAllocSites, but that
110/// requires a deeper change to allow either unread or unwritten objects.
112 auto *Src = MI->getRawSource();
113 while (isa<GetElementPtrInst>(Src)) {
114 if (!Src->hasOneUse())
115 return false;
116 Src = cast<Instruction>(Src)->getOperand(0);
117 }
118 return isa<AllocaInst>(Src) && Src->hasOneUse();
119}
120
122 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
123 MaybeAlign CopyDstAlign = MI->getDestAlign();
124 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
125 MI->setDestAlignment(DstAlign);
126 return MI;
127 }
128
129 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
130 MaybeAlign CopySrcAlign = MI->getSourceAlign();
131 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
132 MI->setSourceAlignment(SrcAlign);
133 return MI;
134 }
135
136 // If we have a store to a location which is known constant, we can conclude
137 // that the store must be storing the constant value (else the memory
138 // wouldn't be constant), and this must be a noop.
139 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
140 // Set the size of the copy to 0, it will be deleted on the next iteration.
141 MI->setLength((uint64_t)0);
142 return MI;
143 }
144
145 // If the source is provably undef, the memcpy/memmove doesn't do anything
146 // (unless the transfer is volatile).
147 if (hasUndefSource(MI) && !MI->isVolatile()) {
148 // Set the size of the copy to 0, it will be deleted on the next iteration.
149 MI->setLength((uint64_t)0);
150 return MI;
151 }
152
153 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
154 // load/store.
155 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
156 if (!MemOpLength) return nullptr;
157
158 // Source and destination pointer types are always "i8*" for intrinsic. See
159 // if the size is something we can handle with a single primitive load/store.
160 // A single load+store correctly handles overlapping memory in the memmove
161 // case.
162 uint64_t Size = MemOpLength->getLimitedValue();
163 assert(Size && "0-sized memory transferring should be removed already.");
164
165 if (Size > 8 || (Size&(Size-1)))
166 return nullptr; // If not 1/2/4/8 bytes, exit.
167
168 // If it is an atomic and alignment is less than the size then we will
169 // introduce the unaligned memory access which will be later transformed
170 // into libcall in CodeGen. This is not evident performance gain so disable
171 // it now.
172 if (MI->isAtomic())
173 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
174 return nullptr;
175
176 // Use an integer load+store unless we can find something better.
177 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
178
179 // If the memcpy has metadata describing the members, see if we can get the
180 // TBAA, scope and noalias tags describing our copy.
181 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
182
183 Value *Src = MI->getArgOperand(1);
184 Value *Dest = MI->getArgOperand(0);
185 LoadInst *L = Builder.CreateLoad(IntType, Src);
186 // Alignment from the mem intrinsic will be better, so use it.
187 L->setAlignment(*CopySrcAlign);
188 L->setAAMetadata(AACopyMD);
189 MDNode *LoopMemParallelMD =
190 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
191 if (LoopMemParallelMD)
192 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
193 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
194 if (AccessGroupMD)
195 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
196
197 StoreInst *S = Builder.CreateStore(L, Dest);
198 // Alignment from the mem intrinsic will be better, so use it.
199 S->setAlignment(*CopyDstAlign);
200 S->setAAMetadata(AACopyMD);
201 if (LoopMemParallelMD)
202 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
203 if (AccessGroupMD)
204 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
205 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
206
207 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
208 // non-atomics can be volatile
209 L->setVolatile(MT->isVolatile());
210 S->setVolatile(MT->isVolatile());
211 }
212 if (MI->isAtomic()) {
213 // atomics have to be unordered
214 L->setOrdering(AtomicOrdering::Unordered);
216 }
217
218 // Set the size of the copy to 0, it will be deleted on the next iteration.
219 MI->setLength((uint64_t)0);
220 return MI;
221}
222
224 const Align KnownAlignment =
225 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
226 MaybeAlign MemSetAlign = MI->getDestAlign();
227 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
228 MI->setDestAlignment(KnownAlignment);
229 return MI;
230 }
231
232 // If we have a store to a location which is known constant, we can conclude
233 // that the store must be storing the constant value (else the memory
234 // wouldn't be constant), and this must be a noop.
235 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
236 // Set the size of the copy to 0, it will be deleted on the next iteration.
237 MI->setLength((uint64_t)0);
238 return MI;
239 }
240
241 // Remove memset with an undef value.
242 // FIXME: This is technically incorrect because it might overwrite a poison
243 // value. Change to PoisonValue once #52930 is resolved.
244 if (isa<UndefValue>(MI->getValue())) {
245 // Set the size of the copy to 0, it will be deleted on the next iteration.
246 MI->setLength((uint64_t)0);
247 return MI;
248 }
249
250 // Extract the length and alignment and fill if they are constant.
251 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
252 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
253 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
254 return nullptr;
255 const uint64_t Len = LenC->getLimitedValue();
256 assert(Len && "0-sized memory setting should be removed already.");
257 const Align Alignment = MI->getDestAlign().valueOrOne();
258
259 // If it is an atomic and alignment is less than the size then we will
260 // introduce the unaligned memory access which will be later transformed
261 // into libcall in CodeGen. This is not evident performance gain so disable
262 // it now.
263 if (MI->isAtomic() && Alignment < Len)
264 return nullptr;
265
266 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
267 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
268 Value *Dest = MI->getDest();
269
270 // Extract the fill value and store.
271 Constant *FillVal = ConstantInt::get(
272 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
273 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
274 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
275 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
276 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
277 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
278 }
279
280 S->setAlignment(Alignment);
281 if (MI->isAtomic())
283
284 // Set the size of the copy to 0, it will be deleted on the next iteration.
285 MI->setLength((uint64_t)0);
286 return MI;
287 }
288
289 return nullptr;
290}
291
292// TODO, Obvious Missing Transforms:
293// * Narrow width by halfs excluding zero/undef lanes
294Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
295 Value *LoadPtr = II.getArgOperand(0);
296 const Align Alignment = II.getParamAlign(0).valueOrOne();
297 Value *Mask = II.getArgOperand(1);
298
299 // If the mask is all ones or poison, this is a plain vector load of the 1st
300 // argument.
301 if (match(Mask, m_AllOnesOrPoison())) {
302 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
303 "unmaskedload");
304 L->copyMetadata(II);
305 return L;
306 }
307
308 // If we can unconditionally load from this address, replace with a
309 // load/select idiom.
310 if (isDereferenceablePointer(LoadPtr, II.getType(),
312 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
313 "unmaskedload");
314 LI->copyMetadata(II);
315 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
316 }
317
318 return nullptr;
319}
320
321// TODO, Obvious Missing Transforms:
322// * Single constant active lane -> store
323// * Narrow width by halfs excluding zero/undef lanes
324Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
325 Value *StorePtr = II.getArgOperand(1);
326 Align Alignment = II.getParamAlign(1).valueOrOne();
327 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
328 if (!ConstMask)
329 return nullptr;
330
331 // If the mask is all zeros or poison, this instruction does nothing.
332 if (match(ConstMask, m_ZeroOrPoison()))
334
335 // If the mask is all ones or poison, this is a plain vector store of the 1st
336 // argument.
337 if (match(ConstMask, m_AllOnesOrPoison())) {
338 StoreInst *S =
339 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
340 S->copyMetadata(II);
341 return S;
342 }
343
344 if (isa<ScalableVectorType>(ConstMask->getType()))
345 return nullptr;
346
347 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
348 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
349 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
350 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
351 PoisonElts))
352 return replaceOperand(II, 0, V);
353
354 return nullptr;
355}
356
357// TODO, Obvious Missing Transforms:
358// * Single constant active lane load -> load
359// * Dereferenceable address & few lanes -> scalarize speculative load/selects
360// * Adjacent vector addresses -> masked.load
361// * Narrow width by halfs excluding zero/undef lanes
362// * Vector incrementing address -> vector masked load
363Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
364 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
365 if (!ConstMask)
366 return nullptr;
367
368 // Vector splat address w/known mask -> scalar load
369 // Fold the gather to load the source vector first lane
370 // because it is reloading the same value each time
371 if (ConstMask->isAllOnesValue())
372 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
373 auto *VecTy = cast<VectorType>(II.getType());
374 const Align Alignment = II.getParamAlign(0).valueOrOne();
375 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
376 Alignment, "load.scalar");
377 Value *Shuf =
378 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
380 }
381
382 return nullptr;
383}
384
385// TODO, Obvious Missing Transforms:
386// * Single constant active lane -> store
387// * Adjacent vector addresses -> masked.store
388// * Narrow store width by halfs excluding zero/undef lanes
389// * Vector incrementing address -> vector masked store
390Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
391 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
392 if (!ConstMask)
393 return nullptr;
394
395 // If the mask is all zeros or poison, a scatter does nothing.
396 if (match(ConstMask, m_ZeroOrPoison()))
398
399 // Vector splat address -> scalar store
400 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
401 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
402 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
403 if (maskContainsAllOneOrUndef(ConstMask)) {
404 Align Alignment = II.getParamAlign(1).valueOrOne();
405 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
406 Alignment);
407 S->copyMetadata(II);
408 return S;
409 }
410 }
411 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
412 // lastlane), ptr
413 if (ConstMask->isAllOnesValue()) {
414 Align Alignment = II.getParamAlign(1).valueOrOne();
415 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
416 ElementCount VF = WideLoadTy->getElementCount();
417 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
418 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
419 Value *Extract =
420 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
421 StoreInst *S =
422 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
423 S->copyMetadata(II);
424 return S;
425 }
426 }
427 if (isa<ScalableVectorType>(ConstMask->getType()))
428 return nullptr;
429
430 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
431 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
432 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
433 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
434 PoisonElts))
435 return replaceOperand(II, 0, V);
436 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
437 PoisonElts))
438 return replaceOperand(II, 1, V);
439
440 return nullptr;
441}
442
443/// This function transforms launder.invariant.group and strip.invariant.group
444/// like:
445/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
446/// launder(strip(%x)) -> launder(%x)
447/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
448/// strip(launder(%x)) -> strip(%x)
449/// This is legal because it preserves the most recent information about
450/// the presence or absence of invariant.group.
452 InstCombinerImpl &IC) {
453 auto *Arg = II.getArgOperand(0);
454 auto *StrippedArg = Arg->stripPointerCasts();
455 auto *StrippedInvariantGroupsArg = StrippedArg;
456 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
457 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
458 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
459 break;
460 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
461 }
462 if (StrippedArg == StrippedInvariantGroupsArg)
463 return nullptr; // No launders/strips to remove.
464
465 Value *Result = nullptr;
466
467 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
468 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
469 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
470 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
471 else
473 "simplifyInvariantGroupIntrinsic only handles launder and strip");
474 if (Result->getType()->getPointerAddressSpace() !=
475 II.getType()->getPointerAddressSpace())
476 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
477
478 return cast<Instruction>(Result);
479}
480
482 assert((II.getIntrinsicID() == Intrinsic::cttz ||
483 II.getIntrinsicID() == Intrinsic::ctlz) &&
484 "Expected cttz or ctlz intrinsic");
485 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
486 Value *Op0 = II.getArgOperand(0);
487 Value *Op1 = II.getArgOperand(1);
488 Value *X;
489 // ctlz(bitreverse(x)) -> cttz(x)
490 // cttz(bitreverse(x)) -> ctlz(x)
491 if (match(Op0, m_BitReverse(m_Value(X)))) {
492 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
493 Function *F =
494 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
495 return CallInst::Create(F, {X, II.getArgOperand(1)});
496 }
497
498 if (II.getType()->isIntOrIntVectorTy(1)) {
499 // ctlz/cttz i1 Op0 --> not Op0
500 if (match(Op1, m_Zero()))
501 return BinaryOperator::CreateNot(Op0);
502 // If zero is poison, then the input can be assumed to be "true", so the
503 // instruction simplifies to "false".
504 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
505 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
506 }
507
508 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
509 if (II.hasOneUse() && match(Op1, m_Zero()) &&
510 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II))))
511 return CallInst::Create(II.getCalledFunction(),
512 {Op0, IC.Builder.getTrue()});
513
514 Constant *C;
515
516 if (IsTZ) {
517 // cttz(-x) -> cttz(x)
518 if (match(Op0, m_Neg(m_Value(X))))
519 return CallInst::Create(II.getCalledFunction(), {X, Op1});
520
521 // cttz(-x & x) -> cttz(x)
522 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
523 return CallInst::Create(II.getCalledFunction(), {X, Op1});
524
525 // cttz(sext(x)) -> cttz(zext(x))
526 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
527 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
528 auto *CttzZext =
529 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
530 return IC.replaceInstUsesWith(II, CttzZext);
531 }
532
533 // Zext doesn't change the number of trailing zeros, so narrow:
534 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
535 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
536 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
537 IC.Builder.getTrue());
538 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
539 return IC.replaceInstUsesWith(II, ZextCttz);
540 }
541
542 // cttz(abs(x)) -> cttz(x)
543 // cttz(nabs(x)) -> cttz(x)
544 Value *Y;
546 if (SPF == SPF_ABS || SPF == SPF_NABS)
547 return CallInst::Create(II.getCalledFunction(), {X, Op1});
548
550 return CallInst::Create(II.getCalledFunction(), {X, Op1});
551
552 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
553 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
554 match(Op1, m_One())) {
555 Value *ConstCttz =
556 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
557 return BinaryOperator::CreateAdd(ConstCttz, X);
558 }
559
560 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
561 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
562 match(Op1, m_One())) {
563 Value *ConstCttz =
564 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
565 return BinaryOperator::CreateSub(ConstCttz, X);
566 }
567
568 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
569 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
570 Value *Width =
571 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
572 return BinaryOperator::CreateSub(Width, X);
573 }
574 } else {
575 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
576 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
577 match(Op1, m_One())) {
578 Value *ConstCtlz =
579 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
580 return BinaryOperator::CreateAdd(ConstCtlz, X);
581 }
582
583 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
584 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
585 match(Op1, m_One())) {
586 Value *ConstCtlz =
587 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
588 return BinaryOperator::CreateSub(ConstCtlz, X);
589 }
590
591 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
592 if (Op0->hasOneUse() &&
593 match(Op0,
595 Type *Ty = II.getType();
596 unsigned BitWidth = Ty->getScalarSizeInBits();
597 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
598 {X, IC.Builder.getFalse()});
599 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
600 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
601 }
602 }
603
604 // cttz(Pow2) -> Log2(Pow2)
605 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
606 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
607 if (IsTZ)
608 return IC.replaceInstUsesWith(II, R);
609 BinaryOperator *BO = BinaryOperator::CreateSub(
610 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
611 R);
612 BO->setHasNoSignedWrap();
614 return BO;
615 }
616
617 KnownBits Known = IC.computeKnownBits(Op0, &II);
618
619 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
620 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
621 : Known.countMaxLeadingZeros();
622 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
623 : Known.countMinLeadingZeros();
624
625 // If all bits above (ctlz) or below (cttz) the first known one are known
626 // zero, this value is constant.
627 // FIXME: This should be in InstSimplify because we're replacing an
628 // instruction with a constant.
629 if (PossibleZeros == DefiniteZeros) {
630 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
631 return IC.replaceInstUsesWith(II, C);
632 }
633
634 // If the input to cttz/ctlz is known to be non-zero,
635 // then change the 'ZeroIsPoison' parameter to 'true'
636 // because we know the zero behavior can't affect the result.
637 if (!Known.One.isZero() ||
639 if (!match(II.getArgOperand(1), m_One()))
640 return CallInst::Create(II.getCalledFunction(),
641 {Op0, IC.Builder.getTrue()});
642 }
643
644 // Add range attribute since known bits can't completely reflect what we know.
645 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
646 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
647 !II.getMetadata(LLVMContext::MD_range)) {
648 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
649 APInt(BitWidth, PossibleZeros + 1));
650 II.addRangeRetAttr(Range);
651 return &II;
652 }
653
654 return nullptr;
655}
656
658 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
659 "Expected ctpop intrinsic");
660 Type *Ty = II.getType();
661 unsigned BitWidth = Ty->getScalarSizeInBits();
662 Value *Op0 = II.getArgOperand(0);
663 Value *X, *Y;
664
665 // ctpop(bitreverse(x)) -> ctpop(x)
666 // ctpop(bswap(x)) -> ctpop(x)
667 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
668 return CallInst::Create(II.getCalledFunction(), X);
669
670 // ctpop(rot(x)) -> ctpop(x)
671 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
672 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
673 X == Y)
674 return CallInst::Create(II.getCalledFunction(), X);
675
676 // ctpop(x | -x) -> bitwidth - cttz(x, false)
677 if (Op0->hasOneUse() &&
678 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
679 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
680 {X, IC.Builder.getFalse()});
681 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
682 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
683 }
684
685 // ctpop(~x & (x - 1)) -> cttz(x, false)
686 if (match(Op0,
688 Function *F =
689 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
690 return CallInst::Create(F, {X, IC.Builder.getFalse()});
691 }
692
693 // Zext doesn't change the number of set bits, so narrow:
694 // ctpop (zext X) --> zext (ctpop X)
695 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
696 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
697 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
698 }
699
700 KnownBits Known(BitWidth);
701 IC.computeKnownBits(Op0, Known, &II);
702
703 // If all bits are zero except for exactly one fixed bit, then the result
704 // must be 0 or 1, and we can get that answer by shifting to LSB:
705 // ctpop (X & 32) --> (X & 32) >> 5
706 // TODO: Investigate removing this as its likely unnecessary given the below
707 // `isKnownToBeAPowerOfTwo` check.
708 if ((~Known.Zero).isPowerOf2())
709 return BinaryOperator::CreateLShr(
710 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
711
712 // More generally we can also handle non-constant power of 2 patterns such as
713 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
714 // ctpop(Pow2OrZero) --> icmp ne X, 0
715 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
716 return CastInst::Create(Instruction::ZExt,
719 Ty);
720
721 // Add range attribute since known bits can't completely reflect what we know.
722 if (BitWidth != 1) {
723 ConstantRange OldRange =
724 II.getRange().value_or(ConstantRange::getFull(BitWidth));
725
726 unsigned Lower = Known.countMinPopulation();
727 unsigned Upper = Known.countMaxPopulation() + 1;
728
729 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
731 Lower = 1;
732
734 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
735
736 if (Range != OldRange) {
737 II.addRangeRetAttr(Range);
738 return &II;
739 }
740 }
741
742 return nullptr;
743}
744
745/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
746/// at most two source operands are actually referenced.
748 bool IsExtension) {
749 // Bail out if the mask is not a constant.
750 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
751 if (!C)
752 return nullptr;
753
754 auto *RetTy = cast<FixedVectorType>(II.getType());
755 unsigned NumIndexes = RetTy->getNumElements();
756
757 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
758 if (!RetTy->getElementType()->isIntegerTy(8) ||
759 (NumIndexes != 8 && NumIndexes != 16))
760 return nullptr;
761
762 // For tbx instructions, the first argument is the "fallback" vector, which
763 // has the same length as the mask and return type.
764 unsigned int StartIndex = (unsigned)IsExtension;
765 auto *SourceTy =
766 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
767 // Note that the element count of each source vector does *not* need to be the
768 // same as the element count of the return type and mask! All source vectors
769 // must have the same element count as each other, though.
770 unsigned NumElementsPerSource = SourceTy->getNumElements();
771
772 // There are no tbl/tbx intrinsics for which the destination size exceeds the
773 // source size. However, our definitions of the intrinsics, at least in
774 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
775 // *could* technically happen.
776 if (NumIndexes > NumElementsPerSource)
777 return nullptr;
778
779 // The tbl/tbx intrinsics take several source operands followed by a mask
780 // operand.
781 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
782
783 // Map input operands to shuffle indices. This also helpfully deduplicates the
784 // input arguments, in case the same value is passed as an argument multiple
785 // times.
786 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
787 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
788 PoisonValue::get(SourceTy)};
789
790 int Indexes[16];
791 for (unsigned I = 0; I < NumIndexes; ++I) {
792 Constant *COp = C->getAggregateElement(I);
793
794 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
795 return nullptr;
796
797 if (isa<UndefValue>(COp)) {
798 Indexes[I] = -1;
799 continue;
800 }
801
802 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
803 // The index of the input argument that this index references (0 = first
804 // source argument, etc).
805 unsigned SourceOperandIndex = Index / NumElementsPerSource;
806 // The index of the element at that source operand.
807 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
808
809 Value *SourceOperand;
810 if (SourceOperandIndex >= NumSourceOperands) {
811 // This index is out of bounds. Map it to index into either the fallback
812 // vector (tbx) or vector of zeroes (tbl).
813 SourceOperandIndex = NumSourceOperands;
814 if (IsExtension) {
815 // For out-of-bounds indices in tbx, choose the `I`th element of the
816 // fallback.
817 SourceOperand = II.getArgOperand(0);
818 SourceOperandElementIndex = I;
819 } else {
820 // Otherwise, choose some element from the dummy vector of zeroes (we'll
821 // always choose the first).
822 SourceOperand = Constant::getNullValue(SourceTy);
823 SourceOperandElementIndex = 0;
824 }
825 } else {
826 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
827 }
828
829 // The source operand may be the fallback vector, which may not have the
830 // same number of elements as the source vector. In that case, we *could*
831 // choose to extend its length with another shufflevector, but it's simpler
832 // to just bail instead.
833 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
834 NumElementsPerSource)
835 return nullptr;
836
837 // We now know the source operand referenced by this index. Make it a
838 // shufflevector operand, if it isn't already.
839 unsigned NumSlots = ValueToShuffleSlot.size();
840 // This shuffle references more than two sources, and hence cannot be
841 // represented as a shufflevector.
842 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
843 return nullptr;
844
845 auto [It, Inserted] =
846 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
847 if (Inserted)
848 ShuffleOperands[It->getSecond()] = SourceOperand;
849
850 unsigned RemappedIndex =
851 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
852 Indexes[I] = RemappedIndex;
853 }
854
856 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
857 return IC.replaceInstUsesWith(II, Shuf);
858}
859
860// Returns true iff the 2 intrinsics have the same operands, limiting the
861// comparison to the first NumOperands.
862static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
863 unsigned NumOperands) {
864 assert(I.arg_size() >= NumOperands && "Not enough operands");
865 assert(E.arg_size() >= NumOperands && "Not enough operands");
866 for (unsigned i = 0; i < NumOperands; i++)
867 if (I.getArgOperand(i) != E.getArgOperand(i))
868 return false;
869 return true;
870}
871
872// Remove trivially empty start/end intrinsic ranges, i.e. a start
873// immediately followed by an end (ignoring debuginfo or other
874// start/end intrinsics in between). As this handles only the most trivial
875// cases, tracking the nesting level is not needed:
876//
877// call @llvm.foo.start(i1 0)
878// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
879// call @llvm.foo.end(i1 0)
880// call @llvm.foo.end(i1 0) ; &I
881static bool
883 std::function<bool(const IntrinsicInst &)> IsStart) {
884 // We start from the end intrinsic and scan backwards, so that InstCombine
885 // has already processed (and potentially removed) all the instructions
886 // before the end intrinsic.
887 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
888 for (; BI != BE; ++BI) {
889 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
890 if (I->isDebugOrPseudoInst() ||
891 I->getIntrinsicID() == EndI.getIntrinsicID())
892 continue;
893 if (IsStart(*I)) {
894 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
896 IC.eraseInstFromFunction(EndI);
897 return true;
898 }
899 // Skip start intrinsics that don't pair with this end intrinsic.
900 continue;
901 }
902 }
903 break;
904 }
905
906 return false;
907}
908
910 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
911 // Bail out on the case where the source va_list of a va_copy is destroyed
912 // immediately by a follow-up va_end.
913 return II.getIntrinsicID() == Intrinsic::vastart ||
914 (II.getIntrinsicID() == Intrinsic::vacopy &&
915 I.getArgOperand(0) != II.getArgOperand(1));
916 });
917 return nullptr;
918}
919
921 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
922 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
923 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
924 Call.setArgOperand(0, Arg1);
925 Call.setArgOperand(1, Arg0);
926 AttributeList CallAttr = Call.getAttributes();
927 AttributeSet LHSAttr = CallAttr.getParamAttrs(0);
928 AttributeSet RHSAttr = CallAttr.getParamAttrs(1);
929 LLVMContext &Ctx = Call.getContext();
930 Call.setAttributes(CallAttr
931 .setAttributesAtIndex(
932 Ctx, AttributeList::FirstArgIndex + 0, RHSAttr)
933 .setAttributesAtIndex(
934 Ctx, AttributeList::FirstArgIndex + 1, LHSAttr));
935 return &Call;
936 }
937 return nullptr;
938}
939
940/// Creates a result tuple for an overflow intrinsic \p II with a given
941/// \p Result and a constant \p Overflow value.
943 Constant *Overflow) {
944 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
945 StructType *ST = cast<StructType>(II->getType());
946 Constant *Struct = ConstantStruct::get(ST, V);
947 return InsertValueInst::Create(Struct, Result, 0);
948}
949
951InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
952 WithOverflowInst *WO = cast<WithOverflowInst>(II);
953 Value *OperationResult = nullptr;
954 Constant *OverflowResult = nullptr;
955 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
956 WO->getRHS(), *WO, OperationResult, OverflowResult))
957 return createOverflowTuple(WO, OperationResult, OverflowResult);
958
959 // See whether we can optimize the overflow check with assumption information.
960 for (User *U : WO->users()) {
961 if (!match(U, m_ExtractValue<1>(m_Value())))
962 continue;
963
964 for (auto &AssumeVH : AC.assumptionsFor(U)) {
965 if (!AssumeVH)
966 continue;
967 CallInst *I = cast<CallInst>(AssumeVH);
968 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
969 continue;
970 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
971 /*AllowEphemerals=*/true))
972 continue;
973 Value *Result =
974 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
975 Result->takeName(WO);
976 if (auto *Inst = dyn_cast<Instruction>(Result)) {
977 if (WO->isSigned())
978 Inst->setHasNoSignedWrap();
979 else
980 Inst->setHasNoUnsignedWrap();
981 }
982 return createOverflowTuple(WO, Result,
983 ConstantInt::getFalse(U->getType()));
984 }
985 }
986
987 return nullptr;
988}
989
990static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
991 Ty = Ty->getScalarType();
992 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
993}
994
995static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
996 Ty = Ty->getScalarType();
997 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
998}
999
1000/// \returns the compare predicate type if the test performed by
1001/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
1002/// floating-point environment assumed for \p F for type \p Ty
1004 const Function &F, Type *Ty) {
1005 switch (static_cast<unsigned>(Mask)) {
1006 case fcZero:
1007 if (inputDenormalIsIEEE(F, Ty))
1008 return FCmpInst::FCMP_OEQ;
1009 break;
1010 case fcZero | fcSubnormal:
1011 if (inputDenormalIsDAZ(F, Ty))
1012 return FCmpInst::FCMP_OEQ;
1013 break;
1014 case fcPositive | fcNegZero:
1015 if (inputDenormalIsIEEE(F, Ty))
1016 return FCmpInst::FCMP_OGE;
1017 break;
1019 if (inputDenormalIsDAZ(F, Ty))
1020 return FCmpInst::FCMP_OGE;
1021 break;
1023 if (inputDenormalIsIEEE(F, Ty))
1024 return FCmpInst::FCMP_OGT;
1025 break;
1026 case fcNegative | fcPosZero:
1027 if (inputDenormalIsIEEE(F, Ty))
1028 return FCmpInst::FCMP_OLE;
1029 break;
1031 if (inputDenormalIsDAZ(F, Ty))
1032 return FCmpInst::FCMP_OLE;
1033 break;
1035 if (inputDenormalIsIEEE(F, Ty))
1036 return FCmpInst::FCMP_OLT;
1037 break;
1038 case fcPosNormal | fcPosInf:
1039 if (inputDenormalIsDAZ(F, Ty))
1040 return FCmpInst::FCMP_OGT;
1041 break;
1042 case fcNegNormal | fcNegInf:
1043 if (inputDenormalIsDAZ(F, Ty))
1044 return FCmpInst::FCMP_OLT;
1045 break;
1046 case ~fcZero & ~fcNan:
1047 if (inputDenormalIsIEEE(F, Ty))
1048 return FCmpInst::FCMP_ONE;
1049 break;
1050 case ~(fcZero | fcSubnormal) & ~fcNan:
1051 if (inputDenormalIsDAZ(F, Ty))
1052 return FCmpInst::FCMP_ONE;
1053 break;
1054 default:
1055 break;
1056 }
1057
1059}
1060
1061Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1062 Value *Src0 = II.getArgOperand(0);
1063 Value *Src1 = II.getArgOperand(1);
1064 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1065 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1066 const bool IsUnordered = (Mask & fcNan) == fcNan;
1067 const bool IsOrdered = (Mask & fcNan) == fcNone;
1068 const FPClassTest OrderedMask = Mask & ~fcNan;
1069 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1070
1071 const bool IsStrict =
1072 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1073
1074 Value *FNegSrc;
1075 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1076 if (match(Src0, m_FNeg(m_Value(FNegSrc))))
1077 return CallInst::Create(
1078 II.getCalledFunction(),
1079 {FNegSrc, ConstantInt::get(Src1->getType(), fneg(Mask))});
1080
1081 Value *FAbsSrc;
1082 if (match(Src0, m_FAbs(m_Value(FAbsSrc))))
1083 return CallInst::Create(
1084 II.getCalledFunction(),
1085 {FAbsSrc, ConstantInt::get(Src1->getType(), inverse_fabs(Mask))});
1086
1087 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1088 (IsOrdered || IsUnordered) && !IsStrict) {
1089 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1090 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1091 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1092 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1094 FCmpInst::Predicate Pred =
1095 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1096 if (OrderedInvertedMask == fcInf)
1097 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1098
1099 Value *Fabs = Builder.CreateFAbs(Src0);
1100 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1101 CmpInf->takeName(&II);
1102 return replaceInstUsesWith(II, CmpInf);
1103 }
1104
1105 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1106 (IsOrdered || IsUnordered) && !IsStrict) {
1107 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1108 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1109 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1110 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1111 Constant *Inf =
1112 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1113 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1114 : Builder.CreateFCmpOEQ(Src0, Inf);
1115
1116 EqInf->takeName(&II);
1117 return replaceInstUsesWith(II, EqInf);
1118 }
1119
1120 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1121 (IsOrdered || IsUnordered) && !IsStrict) {
1122 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1123 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1124 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1125 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1127 OrderedInvertedMask == fcNegInf);
1128 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1129 : Builder.CreateFCmpONE(Src0, Inf);
1130 NeInf->takeName(&II);
1131 return replaceInstUsesWith(II, NeInf);
1132 }
1133
1134 if (Mask == fcNan && !IsStrict) {
1135 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1136 // exceptions.
1137 Value *IsNan =
1138 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1139 IsNan->takeName(&II);
1140 return replaceInstUsesWith(II, IsNan);
1141 }
1142
1143 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1144 // Equivalent of !isnan. Replace with standard fcmp.
1145 Value *FCmp =
1146 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1147 FCmp->takeName(&II);
1148 return replaceInstUsesWith(II, FCmp);
1149 }
1150
1152
1153 // Try to replace with an fcmp with 0
1154 //
1155 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1156 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1157 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1158 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1159 //
1160 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1161 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1162 //
1163 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1164 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1165 //
1166 if (!IsStrict && (IsOrdered || IsUnordered) &&
1167 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1168 Src0->getType())) !=
1171 // Equivalent of == 0.
1172 Value *FCmp = Builder.CreateFCmp(
1173 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1174 Src0, Zero);
1175
1176 FCmp->takeName(&II);
1177 return replaceInstUsesWith(II, FCmp);
1178 }
1179
1180 KnownFPClass Known =
1181 computeKnownFPClass(Src0, Mask, SQ.getWithInstruction(&II));
1182
1183 // Clear test bits we know must be false from the source value.
1184 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1185 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1186 if ((Mask & Known.KnownFPClasses) != Mask) {
1187 II.setArgOperand(
1188 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1189 return &II;
1190 }
1191
1192 // If none of the tests which can return false are possible, fold to true.
1193 // fp_class (nnan x), ~(qnan|snan) -> true
1194 // fp_class (ninf x), ~(ninf|pinf) -> true
1195 if (Mask == Known.KnownFPClasses)
1196 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1197
1198 return nullptr;
1199}
1200
1201static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1202 KnownBits Known = computeKnownBits(Op, SQ);
1203 if (Known.isNonNegative())
1204 return false;
1205 if (Known.isNegative())
1206 return true;
1207
1208 Value *X, *Y;
1209 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1211
1212 return std::nullopt;
1213}
1214
1215static std::optional<bool> getKnownSignOrZero(Value *Op,
1216 const SimplifyQuery &SQ) {
1217 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1218 return Sign;
1219
1220 Value *X, *Y;
1221 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1223
1224 return std::nullopt;
1225}
1226
1227/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1228static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1229 const SimplifyQuery &SQ) {
1230 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1231 if (!Known1)
1232 return false;
1233 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1234 if (!Known0)
1235 return false;
1236 return *Known0 == *Known1;
1237}
1238
1239// Determines if ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b)) is safe.
1240//
1241// This is true if, when the add saturates, the resulting ldexp is guaranteed to
1242// produce 0 or inf.
1243static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy) {
1244 const fltSemantics &FltSem = FpTy->getScalarType()->getFltSemantics();
1245 if (!APFloat::semanticsHasInf(FltSem))
1246 return false;
1247
1248 // Cap ExpBits at 32 because scalbn takes an int. This is sufficient for any
1249 // reasonable fp type (for example, `double` only has 11 exponent bits).
1250 unsigned ExpBits = std::min(ExpTy->getScalarSizeInBits(), 32u);
1251 int SignedMax = static_cast<int>(maxIntN(ExpBits));
1252 int SignedMin = static_cast<int>(minIntN(ExpBits));
1253 APFloat ScaledUp = scalbn(APFloat::getSmallest(FltSem), SignedMax,
1255 APFloat ScaledDown = scalbn(APFloat::getLargest(FltSem), SignedMin,
1257 return ScaledUp.isInfinity() && ScaledDown.isZero();
1258}
1259
1260/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1261/// can trigger other combines.
1263 InstCombiner::BuilderTy &Builder) {
1264 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1265 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1266 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1267 "Expected a min or max intrinsic");
1268
1269 // TODO: Match vectors with undef elements, but undef may not propagate.
1270 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1271 Value *X;
1272 const APInt *C0, *C1;
1273 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1274 !match(Op1, m_APInt(C1)))
1275 return nullptr;
1276
1277 // Check for necessary no-wrap and overflow constraints.
1278 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1279 auto *Add = cast<BinaryOperator>(Op0);
1280 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1281 (!IsSigned && !Add->hasNoUnsignedWrap()))
1282 return nullptr;
1283
1284 // If the constant difference overflows, then instsimplify should reduce the
1285 // min/max to the add or C1.
1286 bool Overflow;
1287 APInt CDiff =
1288 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1289 assert(!Overflow && "Expected simplify of min/max");
1290
1291 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1292 // Note: the "mismatched" no-overflow setting does not propagate.
1293 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1294 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1295 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1296 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1297}
1298/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1299Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1300 Type *Ty = MinMax1.getType();
1301
1302 // We are looking for a tree of:
1303 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1304 // Where the min and max could be reversed
1305 Instruction *MinMax2;
1306 BinaryOperator *AddSub;
1307 const APInt *MinValue, *MaxValue;
1308 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1309 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1310 return nullptr;
1311 } else if (match(&MinMax1,
1312 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1313 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1314 return nullptr;
1315 } else
1316 return nullptr;
1317
1318 // Check that the constants clamp a saturate, and that the new type would be
1319 // sensible to convert to.
1320 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1321 return nullptr;
1322 // In what bitwidth can this be treated as saturating arithmetics?
1323 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1324 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1325 // good first approximation for what should be done there.
1326 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1327 return nullptr;
1328
1329 // Also make sure that the inner min/max and the add/sub have one use.
1330 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1331 return nullptr;
1332
1333 // Create the new type (which can be a vector type)
1334 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1335
1336 Intrinsic::ID IntrinsicID;
1337 if (AddSub->getOpcode() == Instruction::Add)
1338 IntrinsicID = Intrinsic::sadd_sat;
1339 else if (AddSub->getOpcode() == Instruction::Sub)
1340 IntrinsicID = Intrinsic::ssub_sat;
1341 else
1342 return nullptr;
1343
1344 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1345 // is usually achieved via a sext from a smaller type.
1346 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1347 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1348 return nullptr;
1349
1350 // Finally create and return the sat intrinsic, truncated to the new type
1351 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1352 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1353 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1354 return CastInst::Create(Instruction::SExt, Sat, Ty);
1355}
1356
1357
1358/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1359/// can only be one of two possible constant values -- turn that into a select
1360/// of constants.
1362 InstCombiner::BuilderTy &Builder) {
1363 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1364 Value *X;
1365 const APInt *C0, *C1;
1366 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1367 return nullptr;
1368
1370 switch (II->getIntrinsicID()) {
1371 case Intrinsic::smax:
1372 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1373 Pred = ICmpInst::ICMP_SGT;
1374 break;
1375 case Intrinsic::smin:
1376 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1377 Pred = ICmpInst::ICMP_SLT;
1378 break;
1379 case Intrinsic::umax:
1380 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1381 Pred = ICmpInst::ICMP_UGT;
1382 break;
1383 case Intrinsic::umin:
1384 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1385 Pred = ICmpInst::ICMP_ULT;
1386 break;
1387 default:
1388 llvm_unreachable("Expected min/max intrinsic");
1389 }
1390 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1391 return nullptr;
1392
1393 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1394 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1395 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1396 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1397}
1398
1399/// If this min/max has a constant operand and an operand that is a matching
1400/// min/max with a constant operand, constant-fold the 2 constant operands.
1402 IRBuilderBase &Builder,
1403 const SimplifyQuery &SQ) {
1404 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1405 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1406 if (!LHS)
1407 return nullptr;
1408
1409 Constant *C0, *C1;
1410 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1411 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1412 return nullptr;
1413
1414 // max (max X, C0), C1 --> max X, (max C0, C1)
1415 // min (min X, C0), C1 --> min X, (min C0, C1)
1416 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1417 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1418 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1419 if (InnerMinMaxID != MinMaxID &&
1420 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1421 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1422 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1423 return nullptr;
1424
1426 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1427 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1428 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1429 {LHS->getArgOperand(0), NewC});
1430}
1431
1432/// If this min/max has a matching min/max operand with a constant, try to push
1433/// the constant operand into this instruction. This can enable more folds.
1434static Instruction *
1436 InstCombiner::BuilderTy &Builder) {
1437 // Match and capture a min/max operand candidate.
1438 Value *X, *Y;
1439 Constant *C;
1440 Instruction *Inner;
1442 m_Instruction(Inner),
1444 m_Value(Y))))
1445 return nullptr;
1446
1447 // The inner op must match. Check for constants to avoid infinite loops.
1448 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1449 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1450 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1452 return nullptr;
1453
1454 // max (max X, C), Y --> max (max X, Y), C
1456 MinMaxID, II->getType());
1457 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1458 NewInner->takeName(Inner);
1459 return CallInst::Create(MinMax, {NewInner, C});
1460}
1461
1462/// Reduce a sequence of min/max intrinsics with a common operand.
1464 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1465 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1466 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1467 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1468 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1469 RHS->getIntrinsicID() != MinMaxID ||
1470 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1471 return nullptr;
1472
1473 Value *A = LHS->getArgOperand(0);
1474 Value *B = LHS->getArgOperand(1);
1475 Value *C = RHS->getArgOperand(0);
1476 Value *D = RHS->getArgOperand(1);
1477
1478 // Look for a common operand.
1479 Value *MinMaxOp = nullptr;
1480 Value *ThirdOp = nullptr;
1481 if (LHS->hasOneUse()) {
1482 // If the LHS is only used in this chain and the RHS is used outside of it,
1483 // reuse the RHS min/max because that will eliminate the LHS.
1484 if (D == A || C == A) {
1485 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1486 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1487 MinMaxOp = RHS;
1488 ThirdOp = B;
1489 } else if (D == B || C == B) {
1490 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1491 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1492 MinMaxOp = RHS;
1493 ThirdOp = A;
1494 }
1495 } else {
1496 assert(RHS->hasOneUse() && "Expected one-use operand");
1497 // Reuse the LHS. This will eliminate the RHS.
1498 if (D == A || D == B) {
1499 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1500 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1501 MinMaxOp = LHS;
1502 ThirdOp = C;
1503 } else if (C == A || C == B) {
1504 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1505 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1506 MinMaxOp = LHS;
1507 ThirdOp = D;
1508 }
1509 }
1510
1511 if (!MinMaxOp || !ThirdOp)
1512 return nullptr;
1513
1514 Module *Mod = II->getModule();
1515 Function *MinMax =
1516 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1517 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1518}
1519
1520/// If all arguments of the intrinsic are unary shuffles with the same mask,
1521/// try to shuffle after the intrinsic.
1524 if (!II->getType()->isVectorTy() ||
1525 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1526 !II->getCalledFunction()->isSpeculatable())
1527 return nullptr;
1528
1529 Value *X;
1530 Constant *C;
1531 ArrayRef<int> Mask;
1532 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1533 return isa<Constant>(Arg.get()) ||
1534 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1535 Arg.getOperandNo(), nullptr);
1536 });
1537 if (!NonConstArg ||
1538 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1539 return nullptr;
1540
1541 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1542 // instructions.
1543 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1544 return nullptr;
1545
1546 // See if all arguments are shuffled with the same mask.
1548 Type *SrcTy = X->getType();
1549 for (Use &Arg : II->args()) {
1550 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1551 Arg.getOperandNo(), nullptr))
1552 NewArgs.push_back(Arg);
1553 else if (match(&Arg,
1554 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1555 X->getType() == SrcTy)
1556 NewArgs.push_back(X);
1557 else if (match(&Arg, m_ImmConstant(C))) {
1558 // If it's a constant, try find the constant that would be shuffled to C.
1559 if (Constant *ShuffledC =
1560 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1561 NewArgs.push_back(ShuffledC);
1562 else
1563 return nullptr;
1564 } else
1565 return nullptr;
1566 }
1567
1568 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1569 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1570 // Result type might be a different vector width.
1571 // TODO: Check that the result type isn't widened?
1572 VectorType *ResTy =
1573 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1574 Value *NewIntrinsic =
1575 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1576 return new ShuffleVectorInst(NewIntrinsic, Mask);
1577}
1578
1579/// If all arguments of the intrinsic are reverses, try to pull the reverse
1580/// after the intrinsic.
1582 if (!II->getType()->isVectorTy() ||
1583 !isTriviallyVectorizable(II->getIntrinsicID()))
1584 return nullptr;
1585
1586 // At least 1 operand must be a reverse with 1 use because we are creating 2
1587 // instructions.
1588 if (none_of(II->args(), [](Value *V) {
1589 return match(V, m_OneUse(m_VecReverse(m_Value())));
1590 }))
1591 return nullptr;
1592
1593 Value *X;
1594 Constant *C;
1595 SmallVector<Value *> NewArgs;
1596 for (Use &Arg : II->args()) {
1597 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1598 Arg.getOperandNo(), nullptr))
1599 NewArgs.push_back(Arg);
1600 else if (match(&Arg, m_VecReverse(m_Value(X))))
1601 NewArgs.push_back(X);
1602 else if (isSplatValue(Arg))
1603 NewArgs.push_back(Arg);
1604 else if (match(&Arg, m_ImmConstant(C)))
1605 NewArgs.push_back(Builder.CreateVectorReverse(C));
1606 else
1607 return nullptr;
1608 }
1609
1610 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1611 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1612 Value *NewIntrinsic = Builder.CreateIntrinsic(
1613 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1614 return Builder.CreateVectorReverse(NewIntrinsic);
1615}
1616
1617/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1618/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1619/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1620template <Intrinsic::ID IntrID>
1622 InstCombiner::BuilderTy &Builder) {
1623 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1624 "This helper only supports BSWAP and BITREVERSE intrinsics");
1625
1626 Value *X, *Y;
1627 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1628 // don't match ConstantExpr that aren't meaningful for this transform.
1631 Value *OldReorderX, *OldReorderY;
1633
1634 // If both X and Y are bswap/bitreverse, the transform reduces the number
1635 // of instructions even if there's multiuse.
1636 // If only one operand is bswap/bitreverse, we need to ensure the operand
1637 // have only one use.
1638 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1639 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1640 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1641 }
1642
1643 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1644 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1645 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1646 }
1647
1648 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1649 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1650 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1651 }
1652 }
1653 return nullptr;
1654}
1655
1656/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1657/// `f(f(x, y), y) == f(x, y)` holds.
1659 switch (IID) {
1660 case Intrinsic::smax:
1661 case Intrinsic::smin:
1662 case Intrinsic::umax:
1663 case Intrinsic::umin:
1664 case Intrinsic::maximum:
1665 case Intrinsic::minimum:
1666 case Intrinsic::maximumnum:
1667 case Intrinsic::minimumnum:
1668 case Intrinsic::maxnum:
1669 case Intrinsic::minnum:
1670 return true;
1671 default:
1672 return false;
1673 }
1674}
1675
1676/// Attempt to simplify value-accumulating recurrences of kind:
1677/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1678/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1679/// And let the idempotent binary intrinsic be hoisted, when the operands are
1680/// known to be loop-invariant.
1682 IntrinsicInst *II) {
1683 PHINode *PN;
1684 Value *Init, *OtherOp;
1685
1686 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1687 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1688 auto IID = II->getIntrinsicID();
1689 if (!isIdempotentBinaryIntrinsic(IID) ||
1691 !IC.getDominatorTree().dominates(OtherOp, PN))
1692 return nullptr;
1693
1694 auto *InvariantBinaryInst =
1695 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1696 if (isa<FPMathOperator>(InvariantBinaryInst))
1697 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1698 return InvariantBinaryInst;
1699}
1700
1701static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1702 if (!CanReorderLanes)
1703 return nullptr;
1704
1705 Value *V;
1706 if (match(Arg, m_VecReverse(m_Value(V))))
1707 return V;
1708
1709 ArrayRef<int> Mask;
1710 if (!isa<FixedVectorType>(Arg->getType()) ||
1711 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1712 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1713 return nullptr;
1714
1715 int Sz = Mask.size();
1716 SmallBitVector UsedIndices(Sz);
1717 for (int Idx : Mask) {
1718 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1719 return nullptr;
1720 UsedIndices.set(Idx);
1721 }
1722
1723 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1724 // other changes.
1725 return UsedIndices.all() ? V : nullptr;
1726}
1727
1728/// Fold an unsigned minimum of trailing or leading zero bits counts:
1729/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1730/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1731/// >> ConstOp))
1732/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1733/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1734template <Intrinsic::ID IntrID>
1735static Value *
1737 const DataLayout &DL,
1738 InstCombiner::BuilderTy &Builder) {
1739 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1740 "This helper only supports cttz and ctlz intrinsics");
1741
1742 Value *CtOp1, *CtOp2;
1743 Value *ZeroUndef1, *ZeroUndef2;
1744 if (!match(I0, m_OneUse(
1745 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1746 return nullptr;
1747
1748 if (match(I1,
1749 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1750 return Builder.CreateBinaryIntrinsic(
1751 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1752 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1753
1754 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1755 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1756 if (!match(I1, m_CheckedInt(LessBitWidth)))
1757 // We have a constant >= BitWidth (which can be handled by CVP)
1758 // or a non-splat vector with elements < and >= BitWidth
1759 return nullptr;
1760
1761 Type *Ty = I1->getType();
1763 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1764 IntrID == Intrinsic::cttz
1765 ? ConstantInt::get(Ty, 1)
1766 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1767 cast<Constant>(I1), DL);
1768 return Builder.CreateBinaryIntrinsic(
1769 IntrID, Builder.CreateOr(CtOp1, NewConst),
1770 ConstantInt::getTrue(ZeroUndef1->getType()));
1771}
1772
1773/// Return whether "X LOp (Y ROp Z)" is always equal to
1774/// "(X LOp Y) ROp (X LOp Z)".
1776 bool HasNSW, Intrinsic::ID ROp) {
1777 switch (ROp) {
1778 case Intrinsic::umax:
1779 case Intrinsic::umin:
1780 if (HasNUW && LOp == Instruction::Add)
1781 return true;
1782 if (HasNUW && LOp == Instruction::Shl)
1783 return true;
1784 return false;
1785 case Intrinsic::smax:
1786 case Intrinsic::smin:
1787 return HasNSW && LOp == Instruction::Add;
1788 default:
1789 return false;
1790 }
1791}
1792
1793/// Return whether "(X ROp Y) LOp Z" is always equal to
1794/// "(X LOp Z) ROp (Y LOp Z)".
1796 bool HasNSW, Intrinsic::ID ROp) {
1797 if (Instruction::isCommutative(LOp) || LOp == Instruction::Shl)
1798 return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1799 switch (ROp) {
1800 case Intrinsic::umax:
1801 case Intrinsic::umin:
1802 return HasNUW && LOp == Instruction::Sub;
1803 case Intrinsic::smax:
1804 case Intrinsic::smin:
1805 return HasNSW && LOp == Instruction::Sub;
1806 default:
1807 return false;
1808 }
1809}
1810
1811// Attempts to factorise a common term
1812// in an instruction that has the form "(A op' B) op (C op' D)
1813// where op is an intrinsic and op' is a binop
1814static Value *
1816 InstCombiner::BuilderTy &Builder) {
1817 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1818 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1819
1822
1823 if (!Op0 || !Op1)
1824 return nullptr;
1825
1826 if (Op0->getOpcode() != Op1->getOpcode())
1827 return nullptr;
1828
1829 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1830 return nullptr;
1831
1832 Instruction::BinaryOps InnerOpcode =
1833 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1834 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1835 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1836
1837 Value *A = Op0->getOperand(0);
1838 Value *B = Op0->getOperand(1);
1839 Value *C = Op1->getOperand(0);
1840 Value *D = Op1->getOperand(1);
1841
1842 // Attempts to swap variables such that A equals C or B equals D,
1843 // if the inner operation is commutative.
1844 if (Op0->isCommutative() && A != C && B != D) {
1845 if (A == D || B == C)
1846 std::swap(C, D);
1847 else
1848 return nullptr;
1849 }
1850
1851 BinaryOperator *NewBinop;
1852 if (A == C &&
1853 leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) {
1854 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1855 NewBinop =
1856 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1857 } else if (B == D && rightDistributesOverLeft(InnerOpcode, HasNUW, HasNSW,
1858 TopLevelOpcode)) {
1859 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1860 NewBinop =
1861 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1862 } else {
1863 return nullptr;
1864 }
1865
1866 NewBinop->setHasNoUnsignedWrap(HasNUW);
1867 NewBinop->setHasNoSignedWrap(HasNSW);
1868
1869 return NewBinop;
1870}
1871
1873 Value *Arg0 = II->getArgOperand(0);
1874 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1875 if (!ShiftConst)
1876 return nullptr;
1877
1878 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1879 bool AllPositive = true;
1880 bool AllNegative = true;
1881
1882 auto Check = [&](Constant *C) -> bool {
1883 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1884 const APInt &V = CI->getValue();
1885 if (V.isNonNegative()) {
1886 AllNegative = false;
1887 return AllPositive && V.ult(ElemBits);
1888 }
1889 AllPositive = false;
1890 return AllNegative && V.sgt(-ElemBits);
1891 }
1892 return false;
1893 };
1894
1895 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1896 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1897 if (!Check(ShiftConst->getAggregateElement(I)))
1898 return nullptr;
1899 }
1900
1901 } else if (!Check(ShiftConst))
1902 return nullptr;
1903
1904 IRBuilderBase &B = IC.Builder;
1905 if (AllPositive)
1906 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1907
1908 Value *NegAmt = B.CreateNeg(ShiftConst);
1909 Intrinsic::ID IID = II->getIntrinsicID();
1910 const bool IsSigned =
1911 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1912 Value *Result =
1913 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1914 return IC.replaceInstUsesWith(*II, Result);
1915}
1916
1917/// CallInst simplification. This mostly only handles folding of intrinsic
1918/// instructions. For normal calls, it allows visitCallBase to do the heavy
1919/// lifting.
1921 // Don't try to simplify calls without uses. It will not do anything useful,
1922 // but will result in the following folds being skipped.
1923 if (!CI.use_empty()) {
1924 SmallVector<Value *, 8> Args(CI.args());
1925 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1926 SQ.getWithInstruction(&CI)))
1927 return replaceInstUsesWith(CI, V);
1928 }
1929
1930 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1931 return visitFree(CI, FreedOp);
1932
1933 // If the caller function (i.e. us, the function that contains this CallInst)
1934 // is nounwind, mark the call as nounwind, even if the callee isn't.
1935 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1936 CI.setDoesNotThrow();
1937 return &CI;
1938 }
1939
1941 if (!II)
1942 return visitCallBase(CI);
1943
1944 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1945 // instead of in visitCallBase.
1946 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1947 if (auto NumBytes = MI->getLengthInBytes()) {
1948 // memmove/cpy/set of zero bytes is a noop.
1949 if (NumBytes->isZero())
1950 return eraseInstFromFunction(CI);
1951
1952 // For atomic unordered mem intrinsics if len is not a positive or
1953 // not a multiple of element size then behavior is undefined.
1954 if (MI->isAtomic() &&
1955 (NumBytes->isNegative() ||
1956 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1958 assert(MI->getType()->isVoidTy() &&
1959 "non void atomic unordered mem intrinsic");
1960 return eraseInstFromFunction(*MI);
1961 }
1962 }
1963
1964 // No other transformations apply to volatile transfers.
1965 if (MI->isVolatile())
1966 return nullptr;
1967
1969 // memmove(x,x,size) -> noop.
1970 if (MTI->getSource() == MTI->getDest())
1971 return eraseInstFromFunction(CI);
1972 }
1973
1974 auto IsPointerUndefined = [MI](Value *Ptr) {
1975 return isa<ConstantPointerNull>(Ptr) &&
1977 MI->getFunction(),
1978 cast<PointerType>(Ptr->getType())->getAddressSpace());
1979 };
1980 bool SrcIsUndefined = false;
1981 // If we can determine a pointer alignment that is bigger than currently
1982 // set, update the alignment.
1983 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1985 return I;
1986 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1987 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1988 if (Instruction *I = SimplifyAnyMemSet(MSI))
1989 return I;
1990 }
1991
1992 // If src/dest is null, this memory intrinsic must be a noop.
1993 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1994 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1995 return eraseInstFromFunction(CI);
1996 }
1997
1998 // If we have a memmove and the source operation is a constant global,
1999 // then the source and dest pointers can't alias, so we can change this
2000 // into a call to memcpy.
2001 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
2002 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
2003 if (GVSrc->isConstant()) {
2004 Module *M = CI.getModule();
2005 Intrinsic::ID MemCpyID =
2006 MMI->isAtomic()
2007 ? Intrinsic::memcpy_element_unordered_atomic
2008 : Intrinsic::memcpy;
2009 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
2010 CI.getArgOperand(1)->getType(),
2011 CI.getArgOperand(2)->getType() };
2013 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
2014 return II;
2015 }
2016 }
2017 }
2018
2019 // For fixed width vector result intrinsics, use the generic demanded vector
2020 // support.
2021 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
2022 auto VWidth = IIFVTy->getNumElements();
2023 APInt PoisonElts(VWidth, 0);
2024 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2025 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
2026 if (V != II)
2027 return replaceInstUsesWith(*II, V);
2028 return II;
2029 }
2030 }
2031
2032 if (II->isCommutative()) {
2033 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
2034 replaceOperand(*II, 0, Pair->first);
2035 replaceOperand(*II, 1, Pair->second);
2036 II->dropPoisonGeneratingAnnotations();
2037 II->dropUBImplyingAttrsAndMetadata();
2038 return II;
2039 }
2040
2041 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
2042 return NewCall;
2043 }
2044
2045 // Unused constrained FP intrinsic calls may have declared side effect, which
2046 // prevents it from being removed. In some cases however the side effect is
2047 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2048 // returns a replacement, the call may be removed.
2049 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
2050 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
2051 return eraseInstFromFunction(CI);
2052 }
2053
2054 Intrinsic::ID IID = II->getIntrinsicID();
2055 switch (IID) {
2056 case Intrinsic::objectsize: {
2057 SmallVector<Instruction *> InsertedInstructions;
2058 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2059 &InsertedInstructions)) {
2060 for (Instruction *Inserted : InsertedInstructions)
2061 Worklist.add(Inserted);
2062 return replaceInstUsesWith(CI, V);
2063 }
2064 return nullptr;
2065 }
2066 case Intrinsic::abs: {
2067 Value *IIOperand = II->getArgOperand(0);
2068 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2069
2070 // abs(-x) -> abs(x)
2071 Value *X;
2072 if (match(IIOperand, m_Neg(m_Value(X))))
2073 return CallInst::Create(
2074 II->getCalledFunction(),
2075 {X,
2076 Builder.getInt1(IntMinIsPoison ||
2077 cast<Instruction>(IIOperand)->hasNoSignedWrap())});
2078
2079 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2080 return CallInst::Create(II->getCalledFunction(),
2081 {X, II->getArgOperand(1)});
2082
2083 Value *Y;
2084 // abs(a * abs(b)) -> abs(a * b)
2085 if (match(IIOperand,
2088 bool NSW =
2089 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2090 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2091 return CallInst::Create(II->getCalledFunction(),
2092 {XY, II->getArgOperand(1)});
2093 }
2094
2095 if (std::optional<bool> Known =
2096 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2097 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2098 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2099 if (!*Known)
2100 return replaceInstUsesWith(*II, IIOperand);
2101
2102 // abs(x) -> -x if x < 0
2103 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2104 if (IntMinIsPoison)
2105 return BinaryOperator::CreateNSWNeg(IIOperand);
2106 return BinaryOperator::CreateNeg(IIOperand);
2107 }
2108
2109 // abs (sext X) --> zext (abs X*)
2110 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2111 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2112 Value *NarrowAbs =
2113 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2114 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2115 }
2116
2117 // Match a complicated way to check if a number is odd/even:
2118 // abs (srem X, 2) --> and X, 1
2119 const APInt *C;
2120 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2121 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2122
2123 break;
2124 }
2125 case Intrinsic::umin: {
2126 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2127 // umin(x, 1) == zext(x != 0)
2128 if (match(I1, m_One())) {
2129 assert(II->getType()->getScalarSizeInBits() != 1 &&
2130 "Expected simplify of umin with max constant");
2131 Value *Zero = Constant::getNullValue(I0->getType());
2132 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2133 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2134 }
2135 // umin(cttz(x), const) --> cttz(x | (1 << const))
2136 if (Value *FoldedCttz =
2138 I0, I1, DL, Builder))
2139 return replaceInstUsesWith(*II, FoldedCttz);
2140 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2141 if (Value *FoldedCtlz =
2143 I0, I1, DL, Builder))
2144 return replaceInstUsesWith(*II, FoldedCtlz);
2145 [[fallthrough]];
2146 }
2147 case Intrinsic::umax: {
2148 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2149 Value *X, *Y;
2150 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2151 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2152 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2153 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2154 }
2155 Constant *C;
2156 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2157 I0->hasOneUse()) {
2158 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2159 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2160 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2161 }
2162 }
2163 // If C is not 0:
2164 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2165 // If C is not 0 or 1:
2166 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2167 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2168 const APInt *C;
2169 Value *X;
2170 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2171 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2172 return nullptr;
2173 if (C->isZero())
2174 return nullptr;
2175 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2176 return nullptr;
2177
2178 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2179 Value *NewSelect = nullptr;
2180 NewSelect = Builder.CreateSelectWithUnknownProfile(
2181 Cmp, ConstantInt::get(X->getType(), 1), A, DEBUG_TYPE);
2182 return replaceInstUsesWith(*II, NewSelect);
2183 };
2184
2185 if (IID == Intrinsic::umax) {
2186 if (Instruction *I = foldMaxMulShift(I0, I1))
2187 return I;
2188 if (Instruction *I = foldMaxMulShift(I1, I0))
2189 return I;
2190 }
2191
2192 // If both operands of unsigned min/max are sign-extended, it is still ok
2193 // to narrow the operation.
2194 [[fallthrough]];
2195 }
2196 case Intrinsic::smax:
2197 case Intrinsic::smin: {
2198 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2199 Value *X, *Y;
2200 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2201 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2202 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2203 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2204 }
2205
2206 Constant *C;
2207 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2208 I0->hasOneUse()) {
2209 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2210 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2211 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2212 }
2213 }
2214
2215 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2216 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2217 const APInt *MinC, *MaxC;
2218 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2219 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2220 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2221 Value *NewMax = Builder.CreateBinaryIntrinsic(
2222 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2223 return replaceInstUsesWith(
2224 *II, Builder.CreateBinaryIntrinsic(
2225 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2226 };
2227 if (IID == Intrinsic::smax &&
2229 m_APInt(MinC)))) &&
2230 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2231 return CreateCanonicalClampForm(true);
2232 if (IID == Intrinsic::umax &&
2234 m_APInt(MinC)))) &&
2235 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2236 return CreateCanonicalClampForm(false);
2237
2238 // umin(i1 X, i1 Y) -> and i1 X, Y
2239 // smax(i1 X, i1 Y) -> and i1 X, Y
2240 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2241 II->getType()->isIntOrIntVectorTy(1)) {
2242 return BinaryOperator::CreateAnd(I0, I1);
2243 }
2244
2245 // umax(i1 X, i1 Y) -> or i1 X, Y
2246 // smin(i1 X, i1 Y) -> or i1 X, Y
2247 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2248 II->getType()->isIntOrIntVectorTy(1)) {
2249 return BinaryOperator::CreateOr(I0, I1);
2250 }
2251
2252 // smin(smax(X, -1), 1) -> scmp(X, 0)
2253 // smax(smin(X, 1), -1) -> scmp(X, 0)
2254 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2255 // And i1's have been changed to and/ors
2256 // So we only need to check for smin
2257 if (IID == Intrinsic::smin) {
2258 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2259 match(I1, m_One())) {
2260 Value *Zero = ConstantInt::get(X->getType(), 0);
2261 return replaceInstUsesWith(
2262 CI,
2263 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2264 }
2265 }
2266
2267 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2268 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2269 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2270 // TODO: Canonicalize neg after min/max if I1 is constant.
2271 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2272 (I0->hasOneUse() || I1->hasOneUse())) {
2274 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2275 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2276 }
2277 }
2278
2279 // (umax X, (xor X, Pow2))
2280 // -> (or X, Pow2)
2281 // (umin X, (xor X, Pow2))
2282 // -> (and X, ~Pow2)
2283 // (smax X, (xor X, Pos_Pow2))
2284 // -> (or X, Pos_Pow2)
2285 // (smin X, (xor X, Pos_Pow2))
2286 // -> (and X, ~Pos_Pow2)
2287 // (smax X, (xor X, Neg_Pow2))
2288 // -> (and X, ~Neg_Pow2)
2289 // (smin X, (xor X, Neg_Pow2))
2290 // -> (or X, Neg_Pow2)
2291 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2292 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2293 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2294 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2295 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2296
2297 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2298 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2299 if (KnownSign == std::nullopt) {
2300 UseOr = false;
2301 UseAndN = false;
2302 } else if (*KnownSign /* true is Signed. */) {
2303 UseOr ^= true;
2304 UseAndN ^= true;
2305 Type *Ty = I0->getType();
2306 // Negative power of 2 must be IntMin. It's possible to be able to
2307 // prove negative / power of 2 without actually having known bits, so
2308 // just get the value by hand.
2310 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2311 }
2312 }
2313 if (UseOr)
2314 return BinaryOperator::CreateOr(I0, X);
2315 else if (UseAndN)
2316 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2317 }
2318
2319 // If we can eliminate ~A and Y is free to invert:
2320 // max ~A, Y --> ~(min A, ~Y)
2321 //
2322 // Examples:
2323 // max ~A, ~Y --> ~(min A, Y)
2324 // max ~A, C --> ~(min A, ~C)
2325 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2326 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2327 Value *A;
2328 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2329 !isFreeToInvert(A, A->hasOneUse())) {
2330 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2332 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2333 return BinaryOperator::CreateNot(InvMaxMin);
2334 }
2335 }
2336 return nullptr;
2337 };
2338
2339 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2340 return I;
2341 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2342 return I;
2343
2345 return I;
2346
2347 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2348 const APInt *RHSC;
2349 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2350 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2351 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2352 ConstantInt::get(II->getType(), *RHSC));
2353
2354 // smax(X, -X) --> abs(X)
2355 // smin(X, -X) --> -abs(X)
2356 // umax(X, -X) --> -abs(X)
2357 // umin(X, -X) --> abs(X)
2358 if (isKnownNegation(I0, I1)) {
2359 // We can choose either operand as the input to abs(), but if we can
2360 // eliminate the only use of a value, that's better for subsequent
2361 // transforms/analysis.
2362 if (I0->hasOneUse() && !I1->hasOneUse())
2363 std::swap(I0, I1);
2364
2365 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2366 // operation and potentially its negation.
2367 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2368 Value *Abs = Builder.CreateBinaryIntrinsic(
2369 Intrinsic::abs, I0,
2370 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2371
2372 // We don't have a "nabs" intrinsic, so negate if needed based on the
2373 // max/min operation.
2374 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2375 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2376 return replaceInstUsesWith(CI, Abs);
2377 }
2378
2380 return Sel;
2381
2382 if (Instruction *SAdd = matchSAddSubSat(*II))
2383 return SAdd;
2384
2385 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2386 return replaceInstUsesWith(*II, NewMinMax);
2387
2389 return R;
2390
2391 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2392 return NewMinMax;
2393
2394 // Try to fold minmax with constant RHS based on range information
2395 if (match(I1, m_APIntAllowPoison(RHSC))) {
2396 ICmpInst::Predicate Pred =
2398 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2400 I0, IsSigned, SQ.getWithInstruction(II));
2401 if (!LHS_CR.isFullSet()) {
2402 if (LHS_CR.icmp(Pred, *RHSC))
2403 return replaceInstUsesWith(*II, I0);
2404 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2405 return replaceInstUsesWith(*II,
2406 ConstantInt::get(II->getType(), *RHSC));
2407 }
2408 }
2409
2411 return replaceInstUsesWith(*II, V);
2412
2413 break;
2414 }
2415 case Intrinsic::scmp: {
2416 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2417 Value *LHS, *RHS;
2418 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2419 return replaceInstUsesWith(
2420 CI,
2421 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2422 break;
2423 }
2424 case Intrinsic::bitreverse: {
2425 Value *IIOperand = II->getArgOperand(0);
2426 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2427 Value *X;
2428 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2429 X->getType()->isIntOrIntVectorTy(1)) {
2430 Type *Ty = II->getType();
2431 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2432 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2434 }
2435
2436 if (Instruction *crossLogicOpFold =
2438 return crossLogicOpFold;
2439
2440 break;
2441 }
2442 case Intrinsic::bswap: {
2443 Value *IIOperand = II->getArgOperand(0);
2444
2445 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2446 // inverse-shift-of-bswap:
2447 // bswap (shl X, Y) --> lshr (bswap X), Y
2448 // bswap (lshr X, Y) --> shl (bswap X), Y
2449 Value *X, *Y;
2450 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2451 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2453 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2454 BinaryOperator::BinaryOps InverseShift =
2455 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2456 ? Instruction::LShr
2457 : Instruction::Shl;
2458 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2459 }
2460 }
2461
2462 KnownBits Known = computeKnownBits(IIOperand, II);
2463 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2464 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2465 unsigned BW = Known.getBitWidth();
2466
2467 // bswap(x) -> shift(x) if x has exactly one "active byte"
2468 if (BW - LZ - TZ == 8) {
2469 assert(LZ != TZ && "active byte cannot be in the middle");
2470 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2471 return BinaryOperator::CreateNUWShl(
2472 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2473 // -> lshr(x) if the "active byte" is in the high part of x
2474 return BinaryOperator::CreateExactLShr(
2475 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2476 }
2477
2478 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2479 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2480 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2481 Value *CV = ConstantInt::get(X->getType(), C);
2482 Value *V = Builder.CreateLShr(X, CV);
2483 return new TruncInst(V, IIOperand->getType());
2484 }
2485
2486 if (Instruction *crossLogicOpFold =
2488 return crossLogicOpFold;
2489 }
2490
2491 // Try to fold into bitreverse if bswap is the root of the expression tree.
2492 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2493 /*MatchBitReversals*/ true))
2494 return BitOp;
2495 break;
2496 }
2497 case Intrinsic::masked_load:
2498 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2499 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2500 break;
2501 case Intrinsic::masked_store:
2502 return simplifyMaskedStore(*II);
2503 case Intrinsic::masked_gather:
2504 return simplifyMaskedGather(*II);
2505 case Intrinsic::masked_scatter:
2506 return simplifyMaskedScatter(*II);
2507 case Intrinsic::launder_invariant_group:
2508 case Intrinsic::strip_invariant_group:
2509 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2510 return replaceInstUsesWith(*II, SkippedBarrier);
2511 break;
2512 case Intrinsic::powi: {
2513 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2514 // 0 and 1 are handled in instsimplify
2515 // powi(x, -1) -> 1/x
2516 if (Power->isMinusOne())
2517 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2518 II->getArgOperand(0), II);
2519 // powi(x, 2) -> x*x
2520 if (Power->equalsInt(2))
2521 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2522 II->getArgOperand(0), II);
2523
2524 if (!Power->getValue()[0]) {
2525 Value *X;
2526 // If power is even:
2527 // powi(-x, p) -> powi(x, p)
2528 // powi(fabs(x), p) -> powi(x, p)
2529 // powi(copysign(x, y), p) -> powi(x, p)
2530 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2531 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2532 match(II->getArgOperand(0),
2534 return CallInst::Create(II->getCalledFunction(), {X, Power});
2535 }
2536 }
2537 if (ConstantFP *Base = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
2538 Value *Exp = II->getArgOperand(1);
2539 Type *Ty = Base->getType();
2540 // powi(2.0, p) -> ldexp(1.0, p)
2541 if (II->hasApproxFunc() && Base->isExactlyValue(2.0)) {
2542 ConstantFP *One = ConstantFP::get(Ty, 1.0);
2543 if (auto *VTy = dyn_cast<VectorType>(Ty))
2544 Exp = Builder.CreateVectorSplat(VTy->getElementCount(), Exp);
2545 Value *Ldexp = Builder.CreateLdexp(One, Exp, II);
2546 return replaceInstUsesWith(*II, Ldexp);
2547 }
2548 }
2549 break;
2550 }
2551
2552 case Intrinsic::cttz:
2553 case Intrinsic::ctlz:
2554 if (auto *I = foldCttzCtlz(*II, *this))
2555 return I;
2556 break;
2557
2558 case Intrinsic::ctpop:
2559 if (auto *I = foldCtpop(*II, *this))
2560 return I;
2561 break;
2562
2563 case Intrinsic::fshl:
2564 case Intrinsic::fshr: {
2565 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2566 Type *Ty = II->getType();
2567 unsigned BitWidth = Ty->getScalarSizeInBits();
2568 Constant *ShAmtC;
2569 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2570 // Canonicalize a shift amount constant operand to modulo the bit-width.
2571 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2572 Constant *ModuloC =
2573 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2574 if (!ModuloC)
2575 return nullptr;
2576 if (ModuloC != ShAmtC)
2577 return CallInst::Create(II->getCalledFunction(), {Op0, Op1, ModuloC});
2578
2580 ShAmtC, DL),
2581 m_One()) &&
2582 "Shift amount expected to be modulo bitwidth");
2583
2584 // Canonicalize funnel shift right by constant to funnel shift left. This
2585 // is not entirely arbitrary. For historical reasons, the backend may
2586 // recognize rotate left patterns but miss rotate right patterns.
2587 if (IID == Intrinsic::fshr) {
2588 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2589 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2590 return nullptr;
2591
2592 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2593 Module *Mod = II->getModule();
2594 Function *Fshl =
2595 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2596 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2597 }
2598 assert(IID == Intrinsic::fshl &&
2599 "All funnel shifts by simple constants should go left");
2600
2601 // fshl(X, 0, C) --> shl X, C
2602 // fshl(X, undef, C) --> shl X, C
2603 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2604 return BinaryOperator::CreateShl(Op0, ShAmtC);
2605
2606 // fshl(0, X, C) --> lshr X, (BW-C)
2607 // fshl(undef, X, C) --> lshr X, (BW-C)
2608 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2609 return BinaryOperator::CreateLShr(Op1,
2610 ConstantExpr::getSub(WidthC, ShAmtC));
2611
2612 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2613 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2614 Module *Mod = II->getModule();
2615 Function *Bswap =
2616 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2617 return CallInst::Create(Bswap, { Op0 });
2618 }
2619 if (Instruction *BitOp =
2620 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2621 /*MatchBitReversals*/ true))
2622 return BitOp;
2623
2624 // R = fshl(X, X, C2)
2625 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2626 Value *InnerOp;
2627 const APInt *ShAmtInnerC, *ShAmtOuterC;
2628 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2629 m_APInt(ShAmtInnerC))) &&
2630 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2631 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2632 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2633 if (Modulo.isZero())
2634 return replaceInstUsesWith(*II, InnerOp);
2635 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2637 {InnerOp, InnerOp, ModuloC});
2638 }
2639 }
2640
2641 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2642 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2643 // if BitWidth is a power-of-2
2644 Value *Y;
2645 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2646 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2647 Module *Mod = II->getModule();
2649 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2650 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2651 }
2652
2653 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2654 // power-of-2
2655 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2656 match(Op1, m_ZeroInt())) {
2657 Value *Op2 = II->getArgOperand(2);
2658 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2659 return BinaryOperator::CreateShl(Op0, And);
2660 }
2661
2662 // Left or right might be masked.
2664 return &CI;
2665
2666 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2667 // so only the low bits of the shift amount are demanded if the bitwidth is
2668 // a power-of-2.
2669 if (!isPowerOf2_32(BitWidth))
2670 break;
2672 KnownBits Op2Known(BitWidth);
2673 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2674 return &CI;
2675 break;
2676 }
2677 case Intrinsic::pdep: {
2678 const APInt *MaskC;
2679 if (match(II->getArgOperand(1), m_APInt(MaskC))) {
2680 unsigned MaskIdx, MaskLen;
2681 if (MaskC->isShiftedMask(MaskIdx, MaskLen)) {
2682 // any single contiguous sequence of 1s anywhere in the mask simply
2683 // describes a subset of the input bits shifted to the appropriate
2684 // position. Replace with the straight forward IR.
2685 Value *Input = II->getArgOperand(0);
2686 Value *ShiftAmt = ConstantInt::get(II->getType(), MaskIdx);
2687 Value *Shifted = Builder.CreateShl(Input, ShiftAmt);
2688 Value *Masked = Builder.CreateAnd(Shifted, II->getArgOperand(1));
2689 return replaceInstUsesWith(*II, Masked);
2690 }
2691 }
2692 break;
2693 }
2694 case Intrinsic::pext: {
2695 const APInt *MaskC;
2696 if (match(II->getArgOperand(1), m_APInt(MaskC))) {
2697 unsigned MaskIdx, MaskLen;
2698 if (MaskC->isShiftedMask(MaskIdx, MaskLen)) {
2699 // any single contiguous sequence of 1s anywhere in the mask simply
2700 // describes a subset of the input bits shifted to the appropriate
2701 // position. Replace with the straight forward IR.
2702 Value *Input = II->getArgOperand(0);
2703 Value *Masked = Builder.CreateAnd(Input, II->getArgOperand(1));
2704 Value *ShiftAmt = ConstantInt::get(II->getType(), MaskIdx);
2705 Value *Shifted = Builder.CreateLShr(Masked, ShiftAmt);
2706 return replaceInstUsesWith(*II, Shifted);
2707 }
2708 }
2709 break;
2710 }
2711 case Intrinsic::ptrmask: {
2712 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2713 KnownBits Known(BitWidth);
2715 return II;
2716
2717 Value *InnerPtr, *InnerMask;
2718 bool Changed = false;
2719 // Combine:
2720 // (ptrmask (ptrmask p, A), B)
2721 // -> (ptrmask p, (and A, B))
2722 if (match(II->getArgOperand(0),
2724 m_Value(InnerMask))))) {
2725 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2726 "Mask types must match");
2727 // TODO: If InnerMask == Op1, we could copy attributes from inner
2728 // callsite -> outer callsite.
2729 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2730 replaceOperand(CI, 0, InnerPtr);
2731 replaceOperand(CI, 1, NewMask);
2732 Changed = true;
2733 }
2734
2735 // See if we can deduce non-null.
2736 if (!CI.hasRetAttr(Attribute::NonNull) &&
2737 (Known.isNonZero() ||
2738 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2739 CI.addRetAttr(Attribute::NonNull);
2740 Changed = true;
2741 }
2742
2743 unsigned NewAlignmentLog =
2745 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2746 // Known bits will capture if we had alignment information associated with
2747 // the pointer argument.
2748 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2750 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2751 Changed = true;
2752 }
2753 if (Changed)
2754 return &CI;
2755 break;
2756 }
2757 case Intrinsic::uadd_with_overflow:
2758 case Intrinsic::sadd_with_overflow: {
2759 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2760 return I;
2761
2762 // Given 2 constant operands whose sum does not overflow:
2763 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2764 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2765 Value *X;
2766 const APInt *C0, *C1;
2767 Value *Arg0 = II->getArgOperand(0);
2768 Value *Arg1 = II->getArgOperand(1);
2769 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2770 bool HasNWAdd = IsSigned
2771 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2772 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2773 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2774 bool Overflow;
2775 APInt NewC =
2776 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2777 if (!Overflow)
2778 return replaceInstUsesWith(
2779 *II, Builder.CreateBinaryIntrinsic(
2780 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2781 }
2782 break;
2783 }
2784
2785 case Intrinsic::umul_with_overflow:
2786 case Intrinsic::smul_with_overflow:
2787 case Intrinsic::usub_with_overflow:
2788 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2789 return I;
2790 break;
2791
2792 case Intrinsic::ssub_with_overflow: {
2793 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2794 return I;
2795
2796 Constant *C;
2797 Value *Arg0 = II->getArgOperand(0);
2798 Value *Arg1 = II->getArgOperand(1);
2799 // Given a constant C that is not the minimum signed value
2800 // for an integer of a given bit width:
2801 //
2802 // ssubo X, C -> saddo X, -C
2803 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2804 Value *NegVal = ConstantExpr::getNeg(C);
2805 // Build a saddo call that is equivalent to the discovered
2806 // ssubo call.
2807 return replaceInstUsesWith(
2808 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2809 Arg0, NegVal));
2810 }
2811
2812 break;
2813 }
2814
2815 case Intrinsic::uadd_sat:
2816 case Intrinsic::sadd_sat:
2817 case Intrinsic::usub_sat:
2818 case Intrinsic::ssub_sat: {
2820 Type *Ty = SI->getType();
2821 Value *Arg0 = SI->getLHS();
2822 Value *Arg1 = SI->getRHS();
2823
2824 // Make use of known overflow information.
2825 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2826 Arg0, Arg1, SI);
2827 switch (OR) {
2829 break;
2831 if (SI->isSigned())
2832 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2833 else
2834 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2836 unsigned BitWidth = Ty->getScalarSizeInBits();
2837 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2838 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2839 }
2841 unsigned BitWidth = Ty->getScalarSizeInBits();
2842 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2843 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2844 }
2845 }
2846
2847 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2848 // which after that:
2849 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2850 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2851 Constant *C, *C1;
2852 Value *A;
2853 if (IID == Intrinsic::usub_sat &&
2854 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2855 match(Arg1, m_ImmConstant(C1))) {
2856 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2857 auto *NewSub =
2858 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2859 return replaceInstUsesWith(*SI, NewSub);
2860 }
2861
2862 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2863 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2864 C->isNotMinSignedValue()) {
2865 Value *NegVal = ConstantExpr::getNeg(C);
2866 return replaceInstUsesWith(
2867 *II, Builder.CreateBinaryIntrinsic(
2868 Intrinsic::sadd_sat, Arg0, NegVal));
2869 }
2870
2871 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2872 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2873 // if Val and Val2 have the same sign
2874 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2875 Value *X;
2876 const APInt *Val, *Val2;
2877 APInt NewVal;
2878 bool IsUnsigned =
2879 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2880 if (Other->getIntrinsicID() == IID &&
2881 match(Arg1, m_APInt(Val)) &&
2882 match(Other->getArgOperand(0), m_Value(X)) &&
2883 match(Other->getArgOperand(1), m_APInt(Val2))) {
2884 if (IsUnsigned)
2885 NewVal = Val->uadd_sat(*Val2);
2886 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2887 bool Overflow;
2888 NewVal = Val->sadd_ov(*Val2, Overflow);
2889 if (Overflow) {
2890 // Both adds together may add more than SignedMaxValue
2891 // without saturating the final result.
2892 break;
2893 }
2894 } else {
2895 // Cannot fold saturated addition with different signs.
2896 break;
2897 }
2898
2899 return replaceInstUsesWith(
2900 *II, Builder.CreateBinaryIntrinsic(
2901 IID, X, ConstantInt::get(II->getType(), NewVal)));
2902 }
2903 }
2904 break;
2905 }
2906
2907 case Intrinsic::minnum:
2908 case Intrinsic::maxnum:
2909 case Intrinsic::minimumnum:
2910 case Intrinsic::maximumnum:
2911 case Intrinsic::minimum:
2912 case Intrinsic::maximum: {
2913 Value *Arg0 = II->getArgOperand(0);
2914 Value *Arg1 = II->getArgOperand(1);
2915 Value *X, *Y;
2916 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2917 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2918 // If both operands are negated, invert the call and negate the result:
2919 // min(-X, -Y) --> -(max(X, Y))
2920 // max(-X, -Y) --> -(min(X, Y))
2921 Intrinsic::ID NewIID;
2922 switch (IID) {
2923 case Intrinsic::maxnum:
2924 NewIID = Intrinsic::minnum;
2925 break;
2926 case Intrinsic::minnum:
2927 NewIID = Intrinsic::maxnum;
2928 break;
2929 case Intrinsic::maximumnum:
2930 NewIID = Intrinsic::minimumnum;
2931 break;
2932 case Intrinsic::minimumnum:
2933 NewIID = Intrinsic::maximumnum;
2934 break;
2935 case Intrinsic::maximum:
2936 NewIID = Intrinsic::minimum;
2937 break;
2938 case Intrinsic::minimum:
2939 NewIID = Intrinsic::maximum;
2940 break;
2941 default:
2942 llvm_unreachable("unexpected intrinsic ID");
2943 }
2944 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2945 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2946 FNeg->copyIRFlags(II);
2947 return FNeg;
2948 }
2949
2950 // m(m(X, C2), C1) -> m(X, C)
2951 const APFloat *C1, *C2;
2952 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2953 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2954 ((match(M->getArgOperand(0), m_Value(X)) &&
2955 match(M->getArgOperand(1), m_APFloat(C2))) ||
2956 (match(M->getArgOperand(1), m_Value(X)) &&
2957 match(M->getArgOperand(0), m_APFloat(C2))))) {
2958 APFloat Res(0.0);
2959 switch (IID) {
2960 case Intrinsic::maxnum:
2961 Res = maxnum(*C1, *C2);
2962 break;
2963 case Intrinsic::minnum:
2964 Res = minnum(*C1, *C2);
2965 break;
2966 case Intrinsic::maximumnum:
2967 Res = maximumnum(*C1, *C2);
2968 break;
2969 case Intrinsic::minimumnum:
2970 Res = minimumnum(*C1, *C2);
2971 break;
2972 case Intrinsic::maximum:
2973 Res = maximum(*C1, *C2);
2974 break;
2975 case Intrinsic::minimum:
2976 Res = minimum(*C1, *C2);
2977 break;
2978 default:
2979 llvm_unreachable("unexpected intrinsic ID");
2980 }
2981 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2982 // was a simplification (so Arg0 and its original flags could
2983 // propagate?)
2984 Value *V = Builder.CreateBinaryIntrinsic(
2985 IID, X, ConstantFP::get(Arg0->getType(), Res),
2987 return replaceInstUsesWith(*II, V);
2988 }
2989 }
2990
2991 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2992 if (match(Arg0, m_FPExt(m_Value(X))) && match(Arg1, m_FPExt(m_Value(Y))) &&
2993 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2994 X->getType() == Y->getType()) {
2995 Value *NewCall =
2996 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2997 return new FPExtInst(NewCall, II->getType());
2998 }
2999
3000 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
3001 Constant *C;
3002 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
3003 match(Arg1, m_ImmConstant(C))) {
3004 if (Constant *TruncC =
3005 getLosslessInvCast(C, X->getType(), Instruction::FPExt, DL)) {
3006 Value *NewCall =
3007 Builder.CreateBinaryIntrinsic(IID, X, TruncC, II, II->getName());
3008 return new FPExtInst(NewCall, II->getType());
3009 }
3010 }
3011
3012 // max X, -X --> fabs X
3013 // min X, -X --> -(fabs X)
3014 // TODO: Remove one-use limitation? That is obviously better for max,
3015 // hence why we don't check for one-use for that. However,
3016 // it would be an extra instruction for min (fnabs), but
3017 // that is still likely better for analysis and codegen.
3018 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
3019 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
3020 return Op0->hasOneUse() ||
3021 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
3022 IID != Intrinsic::minimumnum);
3023 return false;
3024 };
3025
3026 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
3027 Value *R = Builder.CreateFAbs(X, II);
3028 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
3029 IID == Intrinsic::minimumnum)
3030 R = Builder.CreateFNegFMF(R, II);
3031 return replaceInstUsesWith(*II, R);
3032 }
3033
3034 break;
3035 }
3036 case Intrinsic::matrix_multiply: {
3037 // Optimize negation in matrix multiplication.
3038
3039 // -A * -B -> A * B
3040 Value *A, *B;
3041 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
3042 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
3043 replaceOperand(*II, 0, A);
3044 replaceOperand(*II, 1, B);
3045 return II;
3046 }
3047
3048 Value *Op0 = II->getOperand(0);
3049 Value *Op1 = II->getOperand(1);
3050 Value *OpNotNeg, *NegatedOp;
3051 unsigned NegatedOpArg, OtherOpArg;
3052 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
3053 NegatedOp = Op0;
3054 NegatedOpArg = 0;
3055 OtherOpArg = 1;
3056 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
3057 NegatedOp = Op1;
3058 NegatedOpArg = 1;
3059 OtherOpArg = 0;
3060 } else
3061 // Multiplication doesn't have a negated operand.
3062 break;
3063
3064 // Only optimize if the negated operand has only one use.
3065 if (!NegatedOp->hasOneUse())
3066 break;
3067
3068 Value *OtherOp = II->getOperand(OtherOpArg);
3069 VectorType *RetTy = cast<VectorType>(II->getType());
3070 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
3071 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
3072 ElementCount NegatedCount = NegatedOpTy->getElementCount();
3073 ElementCount OtherCount = OtherOpTy->getElementCount();
3074 ElementCount RetCount = RetTy->getElementCount();
3075 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
3076 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
3077 ElementCount::isKnownLT(OtherCount, RetCount)) {
3078 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
3079 replaceOperand(*II, NegatedOpArg, OpNotNeg);
3080 replaceOperand(*II, OtherOpArg, InverseOtherOp);
3081 return II;
3082 }
3083 // (-A) * B -> -(A * B), if it is cheaper to negate the result
3084 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
3085 SmallVector<Value *, 5> NewArgs(II->args());
3086 NewArgs[NegatedOpArg] = OpNotNeg;
3087 Value *NewMul = Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
3088 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
3089 }
3090 break;
3091 }
3092 case Intrinsic::fmuladd: {
3093 // Try to simplify the underlying FMul.
3094 if (Value *V =
3095 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
3096 II->getFastMathFlags(), SQ.getWithInstruction(II)))
3097 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
3098 II->getFastMathFlags());
3099
3100 [[fallthrough]];
3101 }
3102 case Intrinsic::fma: {
3103 // fma fneg(x), fneg(y), z -> fma x, y, z
3104 Value *Src0 = II->getArgOperand(0);
3105 Value *Src1 = II->getArgOperand(1);
3106 Value *Src2 = II->getArgOperand(2);
3107 Value *X, *Y;
3108 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y))))
3109 return replaceInstUsesWith(
3110 *II, Builder.CreateIntrinsic(IID, II->getType(), {X, Y, Src2}, II));
3111
3112 // fma fabs(x), fabs(x), z -> fma x, x, z
3113 if (match(Src0, m_FAbs(m_Value(X))) && match(Src1, m_FAbs(m_Specific(X))))
3114 return replaceInstUsesWith(
3115 *II, Builder.CreateIntrinsic(IID, II->getType(), {X, X, Src2}, II));
3116
3117 // Try to simplify the underlying FMul. We can only apply simplifications
3118 // that do not require rounding.
3119 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
3120 SQ.getWithInstruction(II)))
3121 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
3122
3123 // fma x, y, 0 -> fmul x, y
3124 // This is always valid for -0.0, but requires nsz for +0.0 as
3125 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3126 if (match(Src2, m_NegZeroFP()) ||
3127 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3128 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3129
3130 // fma x, -1.0, y -> fsub y, x
3131 if (match(Src1, m_SpecificFP(-1.0)))
3132 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3133
3134 break;
3135 }
3136 case Intrinsic::copysign: {
3137 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3138 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3139 Sign, getSimplifyQuery().getWithInstruction(II))) {
3140 if (*KnownSignBit) {
3141 // If we know that the sign argument is negative, reduce to FNABS:
3142 // copysign Mag, -Sign --> fneg (fabs Mag)
3143 Value *Fabs = Builder.CreateFAbs(Mag, II);
3144 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3145 }
3146
3147 // If we know that the sign argument is positive, reduce to FABS:
3148 // copysign Mag, +Sign --> fabs Mag
3149 Value *Fabs = Builder.CreateFAbs(Mag, II);
3150 return replaceInstUsesWith(*II, Fabs);
3151 }
3152
3153 // Propagate sign argument through nested calls:
3154 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3155 Value *X;
3157 Value *CopySign =
3158 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3159 return replaceInstUsesWith(*II, CopySign);
3160 }
3161
3162 // Clear sign-bit of constant magnitude:
3163 // copysign -MagC, X --> copysign MagC, X
3164 // TODO: Support constant folding for fabs
3165 const APFloat *MagC;
3166 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3167 APFloat PosMagC = *MagC;
3168 PosMagC.clearSign();
3169 return replaceInstUsesWith(
3170 *II, Builder.CreateCopySign(ConstantFP::get(Mag->getType(), PosMagC),
3171 Sign, II));
3172 }
3173
3174 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3175 // copysign (fabs X), Sign --> copysign X, Sign
3176 // copysign (fneg X), Sign --> copysign X, Sign
3177 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3178 return replaceInstUsesWith(*II, Builder.CreateCopySign(X, Sign, II));
3179
3180 // copysign(floor(fabs(X)), X) --> copysign(trunc(X), X)
3181 // copysign ignores the sign bit of its magnitude argument (implicit fabs),
3182 // so replacing floor(fabs(X)) with trunc(X) is correct for all inputs
3183 // including NaN without requiring nnan. The m_FAbs match also ensures
3184 // the floor argument is non-negative, so floor == trunc.
3185 Value *FAbsArg;
3186 if (match(Mag, m_Intrinsic<Intrinsic::floor>(m_FAbs(m_Value(FAbsArg)))) &&
3187 FAbsArg == Sign) {
3188 Value *Trunc = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, Sign, II);
3189 return replaceInstUsesWith(*II, Builder.CreateCopySign(Trunc, Sign, II));
3190 }
3191
3192 Type *SignEltTy = Sign->getType()->getScalarType();
3193
3194 Value *CastSrc;
3195 if (match(Sign,
3197 CastSrc->getType()->isIntOrIntVectorTy() &&
3199 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3201 APInt::getSignMask(Known.getBitWidth()), Known,
3202 SQ))
3203 return II;
3204 }
3205
3206 break;
3207 }
3208 case Intrinsic::fabs: {
3209 Value *Cond, *TVal, *FVal;
3210 Value *Arg = II->getArgOperand(0);
3211 Value *X;
3212 // fabs (-X) --> fabs (X)
3213 if (match(Arg, m_FNeg(m_Value(X)))) {
3214 Value *Fabs = Builder.CreateFAbs(X, II);
3215 return replaceInstUsesWith(CI, Fabs);
3216 }
3217
3218 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3219 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3220 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3221 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3222 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3223 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3224 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3225 SI->setFastMathFlags(II->getFastMathFlags() |
3226 cast<SelectInst>(Arg)->getFastMathFlags());
3227 // Can't copy nsz to select, as even with the nsz flag the fabs result
3228 // always has the sign bit unset.
3229 SI->setHasNoSignedZeros(false);
3230 return SI;
3231 }
3232 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3233 if (match(TVal, m_FNeg(m_Specific(FVal))))
3234 return replaceInstUsesWith(*II, Builder.CreateFAbs(FVal, II));
3235 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3236 if (match(FVal, m_FNeg(m_Specific(TVal))))
3237 return replaceInstUsesWith(*II, Builder.CreateFAbs(TVal, II));
3238 }
3239
3240 Value *Magnitude, *Sign;
3241 if (match(II->getArgOperand(0),
3242 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3243 // fabs (copysign x, y) -> (fabs x)
3244 Value *AbsSign = Builder.CreateFAbs(Magnitude, II);
3245 return replaceInstUsesWith(*II, AbsSign);
3246 }
3247
3248 [[fallthrough]];
3249 }
3250 case Intrinsic::ceil:
3251 case Intrinsic::floor:
3252 case Intrinsic::round:
3253 case Intrinsic::roundeven:
3254 case Intrinsic::nearbyint:
3255 case Intrinsic::rint:
3256 case Intrinsic::trunc: {
3257 Value *ExtSrc;
3258 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3259 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3260 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3261 return new FPExtInst(NarrowII, II->getType());
3262 }
3263 break;
3264 }
3265 case Intrinsic::cos:
3266 case Intrinsic::amdgcn_cos:
3267 case Intrinsic::cosh: {
3268 Value *X, *Sign;
3269 Value *Src = II->getArgOperand(0);
3270 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3271 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3272 // f(-x) --> f(x)
3273 // f(fabs(x)) --> f(x)
3274 // f(copysign(x, y)) --> f(x)
3275 // for f in {cos, cosh}
3276 return replaceInstUsesWith(*II, Builder.CreateUnaryIntrinsic(IID, X, II));
3277 }
3278 break;
3279 }
3280 case Intrinsic::sin:
3281 case Intrinsic::amdgcn_sin:
3282 case Intrinsic::sinh:
3283 case Intrinsic::tan:
3284 case Intrinsic::tanh: {
3285 Value *X;
3286 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3287 // f(-x) --> -f(x)
3288 // for f in {sin, sinh, tan, tanh}
3289 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3290 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3291 }
3292 break;
3293 }
3294 case Intrinsic::ldexp: {
3295 Value *Src = II->getArgOperand(0);
3296 Value *Exp = II->getArgOperand(1);
3297
3298 // ldexp(x, K) -> fmul x, 2^K
3299 uint64_t ConstExp;
3300 if (match(Exp, m_ConstantInt(ConstExp))) {
3301 const fltSemantics &FPTy =
3302 Src->getType()->getScalarType()->getFltSemantics();
3303
3304 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3306 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3307 // Skip overflow and underflow cases.
3308 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3309 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3310 }
3311 }
3312
3313 // ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b))
3314 //
3315 // A danger is if the first ldexp would overflow to infinity or underflow to
3316 // zero, but the combined exponent avoids it.
3317 //
3318 // We ignore this with reassoc, or if we know both exponents have the same
3319 // sign (since then we'd just double down on the over/underflow which would
3320 // occur anyway).
3321 //
3322 // ldexp can take arbitrary integer types, so we also need to ensure that
3323 // our exponent type is wide enough so that if sadd.sat(a, b) saturates,
3324 // then ldexp at the saturated exponent saturates to inf or zero as well.
3325 //
3326 // TODO: Could do better if we had range tracking for the input value
3327 // exponent. Also could broaden sign check to cover == 0 case.
3328 Value *InnerSrc;
3329 Value *InnerExp;
3331 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3332 Exp->getType() == InnerExp->getType()) {
3333 FastMathFlags FMF = II->getFastMathFlags();
3334 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3335
3336 if (ldexpSaturatingAddIsSafe(II->getType(), Exp->getType()) &&
3337 ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3338 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II)))) {
3339 Value *NewExp =
3340 Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, InnerExp, Exp);
3341 return replaceInstUsesWith(
3342 *II, Builder.CreateLdexp(InnerSrc, NewExp, FMF | InnerFlags));
3343 }
3344 }
3345
3346 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3347 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3348 Value *ExtSrc;
3349 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3350 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3351 Value *Select =
3352 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3353 ConstantFP::get(II->getType(), 1.0));
3355 }
3356 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3357 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3358 Value *Select =
3359 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3360 ConstantFP::get(II->getType(), 1.0));
3362 }
3363
3364 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3365 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3366 ///
3367 // TODO: If we cared, should insert a canonicalize for x
3368 Value *SelectCond, *SelectLHS, *SelectRHS;
3369 if (match(II->getArgOperand(1),
3370 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3371 m_Value(SelectRHS))))) {
3372 Value *NewLdexp = nullptr;
3373 Value *Select = nullptr;
3374 if (match(SelectRHS, m_ZeroInt())) {
3375 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3376 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3377 } else if (match(SelectLHS, m_ZeroInt())) {
3378 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3379 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3380 }
3381
3382 if (NewLdexp) {
3383 Select->takeName(II);
3384 return replaceInstUsesWith(*II, Select);
3385 }
3386 }
3387
3388 break;
3389 }
3390 case Intrinsic::ptrauth_auth:
3391 case Intrinsic::ptrauth_resign: {
3392 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3393 // sign+auth component if the key and discriminator match.
3394 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3395 Value *Ptr = II->getArgOperand(0);
3396 Value *Key = II->getArgOperand(1);
3397 Value *Disc = II->getArgOperand(2);
3398 Value *DS = nullptr;
3399 if (auto Bundle = II->getOperandBundle(LLVMContext::OB_deactivation_symbol))
3400 DS = Bundle->Inputs[0];
3401
3402 // AuthKey will be the key we need to end up authenticating against in
3403 // whatever we replace this sequence with.
3404 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3405 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3406 Value *OtherDS = nullptr;
3407 if (auto Bundle =
3409 OtherDS = Bundle->Inputs[0];
3410 if (DS != OtherDS)
3411 break;
3412
3413 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3414 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3415 break;
3416 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3417 // The resign intrinsic does not support deactivation symbols.
3418 assert(!DS);
3419 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3420 break;
3421 AuthKey = CI->getArgOperand(1);
3422 AuthDisc = CI->getArgOperand(2);
3423 } else
3424 break;
3425 BasePtr = CI->getArgOperand(0);
3426 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3427 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3428 // our purposes, so check for that too.
3429 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3430 if (!CPA || DS || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3431 break;
3432
3433 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3434 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3435 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3436 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3437 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3438 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3439 SignDisc, /*AddrDisc=*/Null,
3440 /*DeactivationSymbol=*/Null);
3442 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3443 return eraseInstFromFunction(*II);
3444 }
3445
3446 // auth(ptrauth(p,k,d),k,d) -> p
3447 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3448 } else
3449 break;
3450
3451 unsigned NewIntrin;
3452 if (AuthKey && NeedSign) {
3453 // resign(0,1) + resign(1,2) = resign(0, 2)
3454 NewIntrin = Intrinsic::ptrauth_resign;
3455 } else if (AuthKey) {
3456 // resign(0,1) + auth(1) = auth(0)
3457 NewIntrin = Intrinsic::ptrauth_auth;
3458 } else if (NeedSign) {
3459 // sign(0) + resign(0, 1) = sign(1)
3460 NewIntrin = Intrinsic::ptrauth_sign;
3461 } else {
3462 // sign(0) + auth(0) = nop
3463 replaceInstUsesWith(*II, BasePtr);
3464 return eraseInstFromFunction(*II);
3465 }
3466
3467 SmallVector<Value *, 4> CallArgs;
3468 CallArgs.push_back(BasePtr);
3469 if (AuthKey) {
3470 CallArgs.push_back(AuthKey);
3471 CallArgs.push_back(AuthDisc);
3472 }
3473
3474 if (NeedSign) {
3475 CallArgs.push_back(II->getArgOperand(3));
3476 CallArgs.push_back(II->getArgOperand(4));
3477 }
3478
3479 std::vector<OperandBundleDef> Bundles;
3480 if (DS)
3481 Bundles.push_back(OperandBundleDef("deactivation-symbol", DS));
3482
3483 Function *NewFn =
3484 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3485 return CallInst::Create(NewFn, CallArgs, Bundles);
3486 }
3487 case Intrinsic::arm_neon_vtbl1:
3488 case Intrinsic::arm_neon_vtbl2:
3489 case Intrinsic::arm_neon_vtbl3:
3490 case Intrinsic::arm_neon_vtbl4:
3491 case Intrinsic::aarch64_neon_tbl1:
3492 case Intrinsic::aarch64_neon_tbl2:
3493 case Intrinsic::aarch64_neon_tbl3:
3494 case Intrinsic::aarch64_neon_tbl4:
3495 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3496 case Intrinsic::arm_neon_vtbx1:
3497 case Intrinsic::arm_neon_vtbx2:
3498 case Intrinsic::arm_neon_vtbx3:
3499 case Intrinsic::arm_neon_vtbx4:
3500 case Intrinsic::aarch64_neon_tbx1:
3501 case Intrinsic::aarch64_neon_tbx2:
3502 case Intrinsic::aarch64_neon_tbx3:
3503 case Intrinsic::aarch64_neon_tbx4:
3504 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3505
3506 case Intrinsic::arm_neon_vmulls:
3507 case Intrinsic::arm_neon_vmullu:
3508 case Intrinsic::aarch64_neon_smull:
3509 case Intrinsic::aarch64_neon_umull: {
3510 Value *Arg0 = II->getArgOperand(0);
3511 Value *Arg1 = II->getArgOperand(1);
3512
3513 // Handle mul by zero first:
3515 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3516 }
3517
3518 // Check for constant LHS & RHS - in this case we just simplify.
3519 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3520 IID == Intrinsic::aarch64_neon_umull);
3521 VectorType *NewVT = cast<VectorType>(II->getType());
3522 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3523 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3524 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3525 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3526 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3527 }
3528
3529 // Couldn't simplify - canonicalize constant to the RHS.
3530 std::swap(Arg0, Arg1);
3531 }
3532
3533 // Handle mul by one:
3534 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3535 if (ConstantInt *Splat =
3536 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3537 if (Splat->isOne())
3538 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3539 /*isSigned=*/!Zext);
3540
3541 break;
3542 }
3543 case Intrinsic::arm_neon_aesd:
3544 case Intrinsic::arm_neon_aese:
3545 case Intrinsic::aarch64_crypto_aesd:
3546 case Intrinsic::aarch64_crypto_aese:
3547 case Intrinsic::aarch64_sve_aesd:
3548 case Intrinsic::aarch64_sve_aese: {
3549 Value *DataArg = II->getArgOperand(0);
3550 Value *KeyArg = II->getArgOperand(1);
3551
3552 // Accept zero on either operand.
3553 if (!match(KeyArg, m_ZeroInt()))
3554 std::swap(KeyArg, DataArg);
3555
3556 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3557 Value *Data, *Key;
3558 if (match(KeyArg, m_ZeroInt()) &&
3559 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3560 replaceOperand(*II, 0, Data);
3561 replaceOperand(*II, 1, Key);
3562 return II;
3563 }
3564 break;
3565 }
3566 case Intrinsic::arm_neon_vshifts:
3567 case Intrinsic::arm_neon_vshiftu:
3568 case Intrinsic::aarch64_neon_sshl:
3569 case Intrinsic::aarch64_neon_ushl:
3570 return foldNeonShift(II, *this);
3571 case Intrinsic::hexagon_V6_vandvrt:
3572 case Intrinsic::hexagon_V6_vandvrt_128B: {
3573 // Simplify Q -> V -> Q conversion.
3574 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3575 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3576 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3577 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3578 break;
3579 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3580 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3581 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3582 // Check if every byte has common bits in Bytes and Mask.
3583 uint64_t C = Bytes1 & Mask1;
3584 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3585 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3586 }
3587 break;
3588 }
3589 case Intrinsic::stackrestore: {
3590 enum class ClassifyResult {
3591 None,
3592 Alloca,
3593 StackRestore,
3594 CallWithSideEffects,
3595 };
3596 auto Classify = [](const Instruction *I) {
3597 if (isa<AllocaInst>(I))
3598 return ClassifyResult::Alloca;
3599
3600 if (auto *CI = dyn_cast<CallInst>(I)) {
3601 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3602 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3603 return ClassifyResult::StackRestore;
3604
3605 if (II->mayHaveSideEffects())
3606 return ClassifyResult::CallWithSideEffects;
3607 } else {
3608 // Consider all non-intrinsic calls to be side effects
3609 return ClassifyResult::CallWithSideEffects;
3610 }
3611 }
3612
3613 return ClassifyResult::None;
3614 };
3615
3616 // If the stacksave and the stackrestore are in the same BB, and there is
3617 // no intervening call, alloca, or stackrestore of a different stacksave,
3618 // remove the restore. This can happen when variable allocas are DCE'd.
3619 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3620 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3621 SS->getParent() == II->getParent()) {
3622 BasicBlock::iterator BI(SS);
3623 bool CannotRemove = false;
3624 for (++BI; &*BI != II; ++BI) {
3625 switch (Classify(&*BI)) {
3626 case ClassifyResult::None:
3627 // So far so good, look at next instructions.
3628 break;
3629
3630 case ClassifyResult::StackRestore:
3631 // If we found an intervening stackrestore for a different
3632 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3633 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3634 CannotRemove = true;
3635 break;
3636
3637 case ClassifyResult::Alloca:
3638 case ClassifyResult::CallWithSideEffects:
3639 // If we found an alloca, a non-intrinsic call, or an intrinsic
3640 // call with side effects, we can't remove the stackrestore.
3641 CannotRemove = true;
3642 break;
3643 }
3644 if (CannotRemove)
3645 break;
3646 }
3647
3648 if (!CannotRemove)
3649 return eraseInstFromFunction(CI);
3650 }
3651 }
3652
3653 // Scan down this block to see if there is another stack restore in the
3654 // same block without an intervening call/alloca.
3656 Instruction *TI = II->getParent()->getTerminator();
3657 bool CannotRemove = false;
3658 for (++BI; &*BI != TI; ++BI) {
3659 switch (Classify(&*BI)) {
3660 case ClassifyResult::None:
3661 // So far so good, look at next instructions.
3662 break;
3663
3664 case ClassifyResult::StackRestore:
3665 // If there is a stackrestore below this one, remove this one.
3666 return eraseInstFromFunction(CI);
3667
3668 case ClassifyResult::Alloca:
3669 case ClassifyResult::CallWithSideEffects:
3670 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3671 // with side effects (such as llvm.stacksave and llvm.read_register),
3672 // we can't remove the stack restore.
3673 CannotRemove = true;
3674 break;
3675 }
3676 if (CannotRemove)
3677 break;
3678 }
3679
3680 // If the stack restore is in a return, resume, or unwind block and if there
3681 // are no allocas or calls between the restore and the return, nuke the
3682 // restore.
3683 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3684 return eraseInstFromFunction(CI);
3685 break;
3686 }
3687 case Intrinsic::lifetime_end:
3688 // Asan needs to poison memory to detect invalid access which is possible
3689 // even for empty lifetime range.
3690 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3691 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3692 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3693 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3694 break;
3695
3696 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3697 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3698 }))
3699 return nullptr;
3700 break;
3701 case Intrinsic::assume: {
3702 for (auto [Idx, OBU] : llvm::enumerate(II->operand_bundles())) {
3703 auto RemoveBundle = [&, Idx = Idx]() -> Instruction * {
3704 if (II->getNumOperandBundles() == 1)
3705 return eraseInstFromFunction(*II);
3707 };
3708
3709 switch (getBundleAttrFromOBU(OBU)) {
3710 case BundleAttr::None:
3711 llvm_unreachable("Unexpected Attribute");
3712 case BundleAttr::Align: {
3713 // Try to remove redundant alignment assumptions.
3714 auto [Ptr, _, OffsetPtr, Alignment, Offset] = getAssumeAlignInfo(OBU);
3715
3716 if (!Alignment)
3717 break;
3718
3719 // Remove align 1 and non-power-of-two bundles; they don't add any
3720 // useful information.
3721 if (*Alignment == 1 || !isPowerOf2_64(*Alignment))
3722 return RemoveBundle();
3723
3724 if (auto *GEP = dyn_cast<GEPOperator>(Ptr);
3725 GEP &&
3726 GEP->getMaxPreservedAlignment(getDataLayout()) >= *Alignment) {
3727 Builder.CreateAlignmentAssumption(
3728 getDataLayout(), GEP->getPointerOperand(), *Alignment,
3729 OffsetPtr ? const_cast<Value *>(OffsetPtr->get()) : nullptr);
3730 return RemoveBundle();
3731 }
3732
3733 if (!Offset)
3734 break;
3735
3736 Value *BasePtr;
3737 const APInt *PtrOffset;
3738 if (match(Ptr.get(), m_PtrAdd(m_Value(BasePtr), m_APInt(PtrOffset)))) {
3739 auto PtrOffsetVal =
3740 PtrOffset->sextOrTrunc(DL.getIndexTypeSizeInBits(Ptr->getType()))
3741 .trySExtValue();
3742 if (!PtrOffsetVal)
3743 break;
3744 Builder.CreateAlignmentAssumption(
3745 DL, BasePtr, *Alignment,
3746 Builder.getInt64(*Offset - *PtrOffsetVal));
3747 return RemoveBundle();
3748 }
3749
3750 // Don't try to remove align assumptions for pointers derived from
3751 // arguments. We might lose information if the function gets inline and
3752 // the align argument attribute disappears.
3753 Value *UO = getUnderlyingObject(Ptr);
3754 if (!UO || isa<Argument>(UO))
3755 break;
3756
3757 // Compute known bits for the pointer and drop the assume if the
3758 // known alignment isn't increased by it.
3759 auto AlignMask = (*Alignment - 1);
3760 if (KnownBits KB = computeKnownBits(Ptr, II);
3761 (KB.Zero & AlignMask) == (~*Offset & AlignMask) &&
3762 (KB.One & AlignMask) == (*Offset & AlignMask))
3763 return RemoveBundle();
3764 break;
3765 }
3766
3767 case BundleAttr::Dereferenceable: {
3768 auto [Ptr, _, Count] = getAssumeDereferenceableInfo(OBU);
3769
3770 if (!Count)
3771 break;
3772
3773 if (*Count == 0 ||
3775 getSimplifyQuery().getWithInstruction(II)))
3776 return RemoveBundle();
3777
3778 break;
3779 }
3780
3781 case BundleAttr::Ignore:
3782 return RemoveBundle();
3783
3784 case BundleAttr::NonNull: {
3785 auto [Ptr] = llvm::getAssumeNonNullInfo(OBU);
3786
3787 // Drop assume if we can prove nonnull without it
3788 if (isKnownNonZero(Ptr, getSimplifyQuery().getWithInstruction(II)))
3789 return RemoveBundle();
3790
3791 // Fold the assume into metadata if it's valid at the load
3792 if (auto *LI = dyn_cast<LoadInst>(Ptr);
3793 LI &&
3794 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3795 MDNode *MD = MDNode::get(II->getContext(), {});
3796 LI->setMetadata(LLVMContext::MD_nonnull, MD);
3797 LI->setMetadata(LLVMContext::MD_noundef, MD);
3798 return RemoveBundle();
3799 }
3800
3801 if (auto *GEP = dyn_cast<GEPOperator>(Ptr);
3802 GEP && GEP->isInBounds() &&
3803 !NullPointerIsDefined(II->getFunction(),
3804 Ptr->getType()->getPointerAddressSpace())) {
3805 Builder.CreateNonnullAssumption(GEP->stripInBoundsOffsets());
3806 return RemoveBundle();
3807 }
3808
3809 // TODO: apply nonnull return attributes to calls and invokes
3810 break;
3811 }
3812
3813 case BundleAttr::NoUndef: {
3814 auto [Val] = getAssumeNoUndefInfo(OBU);
3815
3817 return RemoveBundle();
3818
3819 if (auto *LI = dyn_cast<LoadInst>(Val);
3820 LI &&
3821 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3822 LI->setMetadata(LLVMContext::MD_noundef,
3823 MDNode::get(II->getContext(), {}));
3824 return RemoveBundle();
3825 }
3826
3827 } break;
3828
3829 case BundleAttr::SeparateStorage: {
3830 auto [Ptr1, Ptr2] = getAssumeSeparateStorageInfo(OBU);
3831 // Separate storage assumptions apply to the underlying allocations, not
3832 // any particular pointer within them. When evaluating the hints for AA
3833 // purposes we getUnderlyingObject them; by precomputing the answers
3834 // here we can avoid having to do so repeatedly there.
3835 auto MaybeSimplifyHint = [&](const Use &U) {
3836 Value *Hint = U.get();
3837 // Not having a limit is safe because InstCombine removes unreachable
3838 // code.
3839 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3840 if (Hint != UnderlyingObject)
3841 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3842 };
3843 MaybeSimplifyHint(Ptr1);
3844 MaybeSimplifyHint(Ptr2);
3845 } break;
3846
3847 // TODO: Drop these assumes when they are redundant
3848 case BundleAttr::DereferenceableOrNull:
3849 break;
3850
3851 // This cannot be simplified
3852 case BundleAttr::Cold:
3853 break;
3854 }
3855 }
3856
3857 // If the assume has operand bundles, the folds below will never work, so
3858 // don't bother trying.
3859 if (II->hasOperandBundles())
3860 break;
3861
3862 Value *IIOperand = II->getArgOperand(0);
3863
3864 // Canonicalize assume(a && b) -> assume(a); assume(b);
3865 // Note: New assumption intrinsics created here are registered by
3866 // the InstCombineIRInserter object.
3867 Value *A, *B;
3868 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3869 Builder.CreateAssumption(A);
3870 Builder.CreateAssumption(B);
3871 return eraseInstFromFunction(*II);
3872 }
3873 // assume(!(a || b)) -> assume(!a); assume(!b);
3874 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3875 Builder.CreateAssumption(Builder.CreateNot(A));
3876 Builder.CreateAssumption(Builder.CreateNot(B));
3877 return eraseInstFromFunction(*II);
3878 }
3879
3880 // Convert nonnull assume like:
3881 // %A = icmp ne i32* %PTR, null
3882 // call void @llvm.assume(i1 %A)
3883 // into
3884 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3885 if (match(IIOperand,
3887 A->getType()->isPointerTy()) {
3888 Builder.CreateNonnullAssumption(A);
3889 return eraseInstFromFunction(*II);
3890 }
3891
3892 // Convert alignment assume like:
3893 // %B = ptrtoint ptr %A to i64
3894 // %C = and i64 %B, Constant
3895 // %D = icmp eq i64 %C, 0
3896 // call void @llvm.assume(i1 %D)
3897 // into
3898 // call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 Constant + 1)]
3899 uint64_t AlignMask = 1;
3900 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3901 match(IIOperand,
3903 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3904 m_Zero())))) {
3905 if (isPowerOf2_64(AlignMask + 1) &&
3907 Builder.CreateAlignmentAssumption(getDataLayout(), A, AlignMask + 1);
3908 return eraseInstFromFunction(*II);
3909 }
3910 }
3911
3912 // Remove assumes on true/false
3913 if (auto *CI = dyn_cast<ConstantInt>(IIOperand);
3914 CI || isa<UndefValue, PoisonValue>(IIOperand)) {
3915 if (!CI || CI->isZero())
3917 return eraseInstFromFunction(*II);
3918 }
3919
3920 // Update the cache of affected values for this assumption (we might be
3921 // here because we just simplified the condition).
3922 AC.updateAffectedValues(cast<AssumeInst>(II));
3923 break;
3924 }
3925 case Intrinsic::experimental_guard: {
3926 // Is this guard followed by another guard? We scan forward over a small
3927 // fixed window of instructions to handle common cases with conditions
3928 // computed between guards.
3929 Instruction *NextInst = II->getNextNode();
3930 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3931 // Note: Using context-free form to avoid compile time blow up
3932 if (!isSafeToSpeculativelyExecute(NextInst))
3933 break;
3934 NextInst = NextInst->getNextNode();
3935 }
3936 Value *NextCond = nullptr;
3937 if (match(NextInst,
3939 Value *CurrCond = II->getArgOperand(0);
3940
3941 // Remove a guard that it is immediately preceded by an identical guard.
3942 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3943 if (CurrCond != NextCond) {
3944 Instruction *MoveI = II->getNextNode();
3945 while (MoveI != NextInst) {
3946 auto *Temp = MoveI;
3947 MoveI = MoveI->getNextNode();
3948 Temp->moveBefore(II->getIterator());
3949 }
3950 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3951 }
3952 eraseInstFromFunction(*NextInst);
3953 return II;
3954 }
3955 break;
3956 }
3957 case Intrinsic::vector_insert: {
3958 Value *Vec = II->getArgOperand(0);
3959 Value *SubVec = II->getArgOperand(1);
3960 Value *Idx = II->getArgOperand(2);
3961 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3962 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3963 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3964
3965 // Only canonicalize if the destination vector, Vec, and SubVec are all
3966 // fixed vectors.
3967 if (DstTy && VecTy && SubVecTy) {
3968 unsigned DstNumElts = DstTy->getNumElements();
3969 unsigned VecNumElts = VecTy->getNumElements();
3970 unsigned SubVecNumElts = SubVecTy->getNumElements();
3971 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3972
3973 // An insert that entirely overwrites Vec with SubVec is a nop.
3974 if (VecNumElts == SubVecNumElts)
3975 return replaceInstUsesWith(CI, SubVec);
3976
3977 // Widen SubVec into a vector of the same width as Vec, since
3978 // shufflevector requires the two input vectors to be the same width.
3979 // Elements beyond the bounds of SubVec within the widened vector are
3980 // undefined.
3981 SmallVector<int, 8> WidenMask;
3982 unsigned i;
3983 for (i = 0; i != SubVecNumElts; ++i)
3984 WidenMask.push_back(i);
3985 for (; i != VecNumElts; ++i)
3986 WidenMask.push_back(PoisonMaskElem);
3987
3988 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3989
3991 for (unsigned i = 0; i != IdxN; ++i)
3992 Mask.push_back(i);
3993 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3994 Mask.push_back(i);
3995 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3996 Mask.push_back(i);
3997
3998 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3999 return replaceInstUsesWith(CI, Shuffle);
4000 }
4001 break;
4002 }
4003 case Intrinsic::vector_extract: {
4004 Value *Vec = II->getArgOperand(0);
4005 Value *Idx = II->getArgOperand(1);
4006
4007 Type *ReturnType = II->getType();
4008 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
4009 // ExtractIdx)
4010 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
4011 Value *InsertTuple, *InsertIdx, *InsertValue;
4013 m_Value(InsertValue),
4014 m_Value(InsertIdx))) &&
4015 InsertValue->getType() == ReturnType) {
4016 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
4017 // Case where we get the same index right after setting it.
4018 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
4019 // InsertValue
4020 if (ExtractIdx == Index)
4021 return replaceInstUsesWith(CI, InsertValue);
4022 // If we are getting a different index than what was set in the
4023 // insert.vector intrinsic. We can just set the input tuple to the one up
4024 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
4025 // InsertIndex), ExtractIndex)
4026 // --> extract.vector(InsertTuple, ExtractIndex)
4027 else
4028 return replaceOperand(CI, 0, InsertTuple);
4029 }
4030
4031 ConstantInt *ALMUpperBound;
4033 m_Value(), m_ConstantInt(ALMUpperBound)))) {
4034 const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
4035 unsigned VScaleMin = Attrs.getVScaleRangeMin();
4036 unsigned ScaleFactor =
4037 cast<VectorType>(ReturnType)->isScalableTy() ? VScaleMin : 1;
4038 if (ExtractIdx * ScaleFactor >= ALMUpperBound->getZExtValue())
4039 return replaceInstUsesWith(CI,
4040 ConstantVector::getNullValue(ReturnType));
4041 }
4042
4043 auto *DstTy = dyn_cast<VectorType>(ReturnType);
4044 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
4045
4046 if (DstTy && VecTy) {
4047 auto DstEltCnt = DstTy->getElementCount();
4048 auto VecEltCnt = VecTy->getElementCount();
4049 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
4050
4051 // Extracting the entirety of Vec is a nop.
4052 if (DstEltCnt == VecTy->getElementCount()) {
4053 replaceInstUsesWith(CI, Vec);
4054 return eraseInstFromFunction(CI);
4055 }
4056
4057 // Only canonicalize to shufflevector if the destination vector and
4058 // Vec are fixed vectors.
4059 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
4060 break;
4061
4063 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
4064 Mask.push_back(IdxN + i);
4065
4066 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
4067 return replaceInstUsesWith(CI, Shuffle);
4068 }
4069 break;
4070 }
4071 case Intrinsic::experimental_vp_reverse: {
4072 Value *X;
4073 Value *Vec = II->getArgOperand(0);
4074 Value *Mask = II->getArgOperand(1);
4075 if (!match(Mask, m_AllOnes()))
4076 break;
4077 Value *EVL = II->getArgOperand(2);
4078 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
4079 // rev(unop rev(X)) --> unop X
4080 if (match(Vec,
4082 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
4083 auto *OldUnOp = cast<UnaryOperator>(Vec);
4085 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
4086 II->getIterator());
4087 return replaceInstUsesWith(CI, NewUnOp);
4088 }
4089 break;
4090 }
4091 case Intrinsic::vector_reduce_or:
4092 case Intrinsic::vector_reduce_and: {
4093 // Canonicalize logical or/and reductions:
4094 // Or reduction for i1 is represented as:
4095 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4096 // %res = cmp ne iReduxWidth %val, 0
4097 // And reduction for i1 is represented as:
4098 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4099 // %res = cmp eq iReduxWidth %val, 11111
4100 Value *Arg = II->getArgOperand(0);
4101 Value *Vect;
4102
4103 if (Value *NewOp =
4104 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4105 replaceUse(II->getOperandUse(0), NewOp);
4106 return II;
4107 }
4108
4109 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4110 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4111 if (FTy->getElementType() == Builder.getInt1Ty()) {
4112 Value *Res = Builder.CreateBitCast(
4113 Vect, Builder.getIntNTy(FTy->getNumElements()));
4114 if (IID == Intrinsic::vector_reduce_and) {
4115 Res = Builder.CreateICmpEQ(
4117 } else {
4118 assert(IID == Intrinsic::vector_reduce_or &&
4119 "Expected or reduction.");
4120 Res = Builder.CreateIsNotNull(Res);
4121 }
4122 if (Arg != Vect)
4123 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4124 II->getType());
4125 return replaceInstUsesWith(CI, Res);
4126 }
4127 }
4128 [[fallthrough]];
4129 }
4130 case Intrinsic::vector_reduce_add: {
4131 if (IID == Intrinsic::vector_reduce_add) {
4132 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4133 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4134 // Convert vector_reduce_add(SExt(<n x i1>)) to
4135 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4136 // Convert vector_reduce_add(<n x i1>) to
4137 // Trunc(ctpop(bitcast <n x i1> to in)).
4138 Value *Arg = II->getArgOperand(0);
4139 Value *Vect;
4140
4141 if (Value *NewOp =
4142 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4143 replaceUse(II->getOperandUse(0), NewOp);
4144 return II;
4145 }
4146
4147 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4148 if (Value *Splat = getSplatValue(Arg)) {
4149 ElementCount VecToReduceCount =
4150 cast<VectorType>(Arg->getType())->getElementCount();
4151 if (VecToReduceCount.isFixed()) {
4152 unsigned VectorSize = VecToReduceCount.getFixedValue();
4153 return BinaryOperator::CreateMul(
4154 Splat,
4155 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4156 /*ImplicitTrunc=*/true));
4157 }
4158 }
4159
4160 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4161 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4162 if (FTy->getElementType() == Builder.getInt1Ty()) {
4163 Value *V = Builder.CreateBitCast(
4164 Vect, Builder.getIntNTy(FTy->getNumElements()));
4165 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
4166 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
4167 if (Arg != Vect &&
4168 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
4169 Res = Builder.CreateNeg(Res);
4170 return replaceInstUsesWith(CI, Res);
4171 }
4172 }
4173 }
4174 [[fallthrough]];
4175 }
4176 case Intrinsic::vector_reduce_xor: {
4177 if (IID == Intrinsic::vector_reduce_xor) {
4178 // Exclusive disjunction reduction over the vector with
4179 // (potentially-extended) i1 element type is actually a
4180 // (potentially-extended) arithmetic `add` reduction over the original
4181 // non-extended value:
4182 // vector_reduce_xor(?ext(<n x i1>))
4183 // -->
4184 // ?ext(vector_reduce_add(<n x i1>))
4185 Value *Arg = II->getArgOperand(0);
4186 Value *Vect;
4187
4188 if (Value *NewOp =
4189 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4190 replaceUse(II->getOperandUse(0), NewOp);
4191 return II;
4192 }
4193
4194 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4195 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4196 if (VTy->getElementType() == Builder.getInt1Ty()) {
4197 Value *Res = Builder.CreateAddReduce(Vect);
4198 if (Arg != Vect)
4199 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4200 II->getType());
4201 return replaceInstUsesWith(CI, Res);
4202 }
4203 }
4204 }
4205 [[fallthrough]];
4206 }
4207 case Intrinsic::vector_reduce_mul: {
4208 if (IID == Intrinsic::vector_reduce_mul) {
4209 Value *Arg = II->getArgOperand(0);
4210
4211 if (Value *NewOp =
4212 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4213 replaceUse(II->getOperandUse(0), NewOp);
4214 return II;
4215 }
4216
4217 // vector_reduce_mul(zext(<n x i1>)), or
4218 // vector_reduce_mul(sext(<n x i1>)) (if n is even) -->
4219 // zext(vector_reduce_and(<n x i1>)).
4220 // (The sext case doesn't work if n is odd because multiplying an odd
4221 // number of -1's produces -1, not 1.)
4222 Value *Vect;
4223 bool IsZext = match(Arg, m_ZExt(m_Value(Vect))) &&
4224 Vect->getType()->isIntOrIntVectorTy(1);
4225 bool IsSext =
4226 match(Arg, m_SExt(m_Value(Vect))) &&
4227 Vect->getType()->isIntOrIntVectorTy(1) &&
4228 cast<VectorType>(Vect->getType())->getElementCount().isKnownEven();
4229 if (IsZext || IsSext) {
4230 Value *Res = Builder.CreateAndReduce(Vect);
4231 return CastInst::Create(Instruction::ZExt, Res, II->getType());
4232 }
4233
4234 // vector_reduce_mul(<n x i1>) --> vector_reduce_and(<n x i1>)
4235 if (Arg->getType()->isIntOrIntVectorTy(1))
4236 return replaceInstUsesWith(CI, Builder.CreateAndReduce(Arg));
4237 }
4238 [[fallthrough]];
4239 }
4240 case Intrinsic::vector_reduce_umin:
4241 case Intrinsic::vector_reduce_umax: {
4242 if (IID == Intrinsic::vector_reduce_umin ||
4243 IID == Intrinsic::vector_reduce_umax) {
4244 // UMin/UMax reduction over the vector with (potentially-extended)
4245 // i1 element type is actually a (potentially-extended)
4246 // logical `and`/`or` reduction over the original non-extended value:
4247 // vector_reduce_u{min,max}(?ext(<n x i1>))
4248 // -->
4249 // ?ext(vector_reduce_{and,or}(<n x i1>))
4250 Value *Arg = II->getArgOperand(0);
4251 Value *Vect;
4252
4253 if (Value *NewOp =
4254 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4255 replaceUse(II->getOperandUse(0), NewOp);
4256 return II;
4257 }
4258
4259 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4260 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4261 if (VTy->getElementType() == Builder.getInt1Ty()) {
4262 Value *Res = IID == Intrinsic::vector_reduce_umin
4263 ? Builder.CreateAndReduce(Vect)
4264 : Builder.CreateOrReduce(Vect);
4265 if (Arg != Vect)
4266 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4267 II->getType());
4268 return replaceInstUsesWith(CI, Res);
4269 }
4270 }
4271 }
4272 [[fallthrough]];
4273 }
4274 case Intrinsic::vector_reduce_smin:
4275 case Intrinsic::vector_reduce_smax: {
4276 if (IID == Intrinsic::vector_reduce_smin ||
4277 IID == Intrinsic::vector_reduce_smax) {
4278 // SMin/SMax reduction over the vector with (potentially-extended)
4279 // i1 element type is actually a (potentially-extended)
4280 // logical `and`/`or` reduction over the original non-extended value:
4281 // vector_reduce_s{min,max}(<n x i1>)
4282 // -->
4283 // vector_reduce_{or,and}(<n x i1>)
4284 // and
4285 // vector_reduce_s{min,max}(sext(<n x i1>))
4286 // -->
4287 // sext(vector_reduce_{or,and}(<n x i1>))
4288 // and
4289 // vector_reduce_s{min,max}(zext(<n x i1>))
4290 // -->
4291 // zext(vector_reduce_{and,or}(<n x i1>))
4292 Value *Arg = II->getArgOperand(0);
4293 Value *Vect;
4294
4295 if (Value *NewOp =
4296 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4297 replaceUse(II->getOperandUse(0), NewOp);
4298 return II;
4299 }
4300
4301 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4302 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4303 if (VTy->getElementType() == Builder.getInt1Ty()) {
4304 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4305 if (Arg != Vect)
4306 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4307 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4308 (ExtOpc == Instruction::CastOps::ZExt))
4309 ? Builder.CreateAndReduce(Vect)
4310 : Builder.CreateOrReduce(Vect);
4311 if (Arg != Vect)
4312 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4313 return replaceInstUsesWith(CI, Res);
4314 }
4315 }
4316 }
4317 [[fallthrough]];
4318 }
4319 case Intrinsic::vector_reduce_fmax:
4320 case Intrinsic::vector_reduce_fmin:
4321 case Intrinsic::vector_reduce_fadd:
4322 case Intrinsic::vector_reduce_fmul: {
4323 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4324 IID != Intrinsic::vector_reduce_fmul) ||
4325 II->hasAllowReassoc();
4326 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4327 IID == Intrinsic::vector_reduce_fmul)
4328 ? 1
4329 : 0;
4330 Value *Arg = II->getArgOperand(ArgIdx);
4331 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4332 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4333 return nullptr;
4334 }
4335 break;
4336 }
4337 case Intrinsic::is_fpclass: {
4338 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4339 return I;
4340 break;
4341 }
4342 case Intrinsic::threadlocal_address: {
4343 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4344 MaybeAlign Align = II->getRetAlign();
4345 if (MinAlign > Align.valueOrOne()) {
4346 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4347 return II;
4348 }
4349 break;
4350 }
4351 case Intrinsic::fptoui_sat:
4352 case Intrinsic::fptosi_sat:
4353 if (Instruction *I = foldItoFPtoI(*II))
4354 return I;
4355 break;
4356 case Intrinsic::frexp: {
4357 // frexp(frexp(x).fract) -> { frexp(x).fract, 0 }: the fraction operand is
4358 // already normalized, so the first result is idempotent and the second is
4359 // zero.
4360 if (match(II->getArgOperand(0),
4362 Value *Res = Builder.CreateInsertValue(PoisonValue::get(II->getType()),
4363 II->getArgOperand(0), 0);
4364 Res = Builder.CreateInsertValue(
4365 Res, Constant::getNullValue(II->getType()->getStructElementType(1)),
4366 1);
4367 return replaceInstUsesWith(*II, Res);
4368 }
4369 break;
4370 }
4371 case Intrinsic::get_active_lane_mask: {
4372 const APInt *Op0, *Op1;
4373 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4374 match(II->getOperand(1), m_APInt(Op1))) {
4375 Type *OpTy = II->getOperand(0)->getType();
4376 return replaceInstUsesWith(
4377 *II, Builder.CreateIntrinsic(
4378 II->getType(), Intrinsic::get_active_lane_mask,
4379 {Constant::getNullValue(OpTy),
4380 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4381 }
4382 break;
4383 }
4384 case Intrinsic::experimental_get_vector_length: {
4385 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4386 unsigned BitWidth =
4387 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4388 II->getType()->getScalarSizeInBits());
4389 ConstantRange Cnt =
4390 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4391 SQ.getWithInstruction(II))
4393 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4394 ->getValue()
4395 .zextOrTrunc(Cnt.getBitWidth());
4396 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4397 MaxLanes = MaxLanes.multiply(
4398 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4399
4400 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4401 return replaceInstUsesWith(
4402 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4403 return nullptr;
4404 }
4405 default: {
4406 // Handle target specific intrinsics
4407 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4408 if (V)
4409 return *V;
4410 break;
4411 }
4412 }
4413
4414 // Try to fold intrinsic into select/phi operands. This is legal if:
4415 // * The intrinsic is speculatable.
4416 // * The operand is one of the following:
4417 // - a phi.
4418 // - a select with a scalar condition.
4419 // - a select with a vector condition and II is not a cross lane operation.
4421 for (Value *Op : II->args()) {
4422 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4423 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4424 if (IsVectorCond &&
4425 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4426 continue;
4427 // Don't replace a scalar select with a more expensive vector select if
4428 // we can't simplify both arms of the select.
4429 bool SimplifyBothArms =
4430 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4432 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4433 return R;
4434 }
4435 if (auto *Phi = dyn_cast<PHINode>(Op))
4436 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4437 return R;
4438 }
4439 }
4440
4442 return Shuf;
4443
4445 return replaceInstUsesWith(*II, Reverse);
4446
4448 return replaceInstUsesWith(*II, Res);
4449
4450 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4451 // context, so it is handled in visitCallBase and we should trigger it.
4452 return visitCallBase(*II);
4453}
4454
4455// Fence instruction simplification
4457 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4458 // This check is solely here to handle arbitrary target-dependent syncscopes.
4459 // TODO: Can remove if does not matter in practice.
4460 if (NFI && FI.isIdenticalTo(NFI))
4461 return eraseInstFromFunction(FI);
4462
4463 // Returns true if FI1 is identical or stronger fence than FI2.
4464 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4465 auto FI1SyncScope = FI1->getSyncScopeID();
4466 // Consider same scope, where scope is global or single-thread.
4467 if (FI1SyncScope != FI2->getSyncScopeID() ||
4468 (FI1SyncScope != SyncScope::System &&
4469 FI1SyncScope != SyncScope::SingleThread))
4470 return false;
4471
4472 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4473 };
4474 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4475 return eraseInstFromFunction(FI);
4476
4477 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4478 if (isIdenticalOrStrongerFence(PFI, &FI))
4479 return eraseInstFromFunction(FI);
4480 return nullptr;
4481}
4482
4483// InvokeInst simplification
4485 return visitCallBase(II);
4486}
4487
4488// CallBrInst simplification
4490 return visitCallBase(CBI);
4491}
4492
4493// A simple parser for format string specifiers for the purposes of the
4494// modular-format attribute. In the case of malformed format strings this might
4495// under or over report the specifiers present, but such cases are undefined
4496// behavior.
4498 Bitset<256> Specifiers;
4499 for (size_t I = 0; I < FormatStr.size(); ++I) {
4500 if (FormatStr[I] != '%')
4501 continue;
4502
4503 // Check for escaped '%'.
4504 if (I + 1 < FormatStr.size() && FormatStr[I + 1] == '%') {
4505 ++I; // Skip the second '%'.
4506 continue;
4507 }
4508
4509 // Scan past allowed prefix characters.
4510 size_t J =
4511 FormatStr.find_first_not_of("0123456789-+ #0$.*'hlLjztqwvI", I + 1);
4512 if (J == StringRef::npos)
4513 break;
4514
4515 Specifiers.set(static_cast<unsigned char>(FormatStr[J]));
4516 I = J; // Resume search from after the specifier.
4517 }
4518 return Specifiers;
4519}
4520
4521static bool isAspectNeeded(StringRef Aspect, CallInst *CI,
4522 std::optional<unsigned> FirstArgIdx,
4523 const std::optional<Bitset<256>> &Specifiers) {
4524 if (Aspect == "float") {
4525 if (Specifiers) {
4526 static constexpr Bitset<256> FloatSpecifiers{'f', 'F', 'e', 'E',
4527 'g', 'G', 'a', 'A'};
4528 return (*Specifiers & FloatSpecifiers).any();
4529 }
4530 // Fallback to type-based check for dynamic format string.
4531 if (!FirstArgIdx)
4532 return true;
4533 return llvm::any_of(
4534 llvm::make_range(std::next(CI->arg_begin(), *FirstArgIdx),
4535 CI->arg_end()),
4536 [](Value *V) { return V->getType()->isFloatingPointTy(); });
4537 }
4538 if (Aspect == "fixed") {
4539 if (Specifiers) {
4540 static constexpr Bitset<256> FixedSpecifiers{'r', 'R', 'k', 'K'};
4541 return (*Specifiers & FixedSpecifiers).any();
4542 }
4543 // Fallback for fixed-point: assume needed if format is dynamic.
4544 return true;
4545 }
4546 // Unknown aspects are always considered to be needed.
4547 return true;
4548}
4549
4550static void referenceAspect(StringRef Aspect, StringRef ImplName, Module *M,
4551 IRBuilderBase &B) {
4552 SmallString<20> Name = ImplName;
4553 Name += '_';
4554 Name += Aspect;
4555 LLVMContext &Ctx = M->getContext();
4556 Function *RelocNoneFn =
4557 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4558 B.CreateCall(RelocNoneFn,
4559 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4560}
4561
4563 if (!CI->hasFnAttr("modular-format"))
4564 return nullptr;
4565
4567 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4568 if (Args.size() < 5)
4569 return nullptr;
4570
4571 StringRef FormatIdxStr = Args[1];
4572 StringRef FirstArgIdxStr = Args[2];
4573 StringRef FnName = Args[3];
4574 StringRef ImplName = Args[4];
4576
4577 unsigned FormatIdx;
4578 std::optional<unsigned> FirstArgIdx;
4579 [[maybe_unused]] bool Error;
4580 Error = FormatIdxStr.getAsInteger(10, FormatIdx);
4581 assert(!Error && "invalid format arg index");
4582 --FormatIdx; // 1-based to 0-based
4583
4584 FirstArgIdx.emplace();
4585 Error = FirstArgIdxStr.getAsInteger(10, *FirstArgIdx);
4586 assert(!Error && "invalid first arg index");
4587 if (*FirstArgIdx > 0)
4588 --*FirstArgIdx; // 1-based to 0-based
4589 else
4590 FirstArgIdx.reset();
4591
4592 if (AllAspects.empty())
4593 return nullptr;
4594
4595 Value *FormatVal = CI->getArgOperand(FormatIdx);
4596 StringRef FormatStr;
4597
4598 std::optional<Bitset<256>> Specifiers;
4599 if (getConstantStringInfo(FormatVal, FormatStr))
4600 Specifiers = parseFormatStringSpecifiers(FormatStr);
4601
4602 SmallVector<StringRef> NeededAspects;
4603 for (StringRef Aspect : AllAspects)
4604 if (isAspectNeeded(Aspect, CI, FirstArgIdx, Specifiers))
4605 NeededAspects.push_back(Aspect);
4606
4607 if (NeededAspects.size() == AllAspects.size())
4608 return nullptr;
4609
4610 Module *M = CI->getModule();
4611 LLVMContext &Ctx = M->getContext();
4612 Function *Callee = CI->getCalledFunction();
4613 FunctionCallee ModularFn = M->getOrInsertFunction(
4614 FnName, Callee->getFunctionType(),
4615 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4616 CallInst *New = cast<CallInst>(CI->clone());
4617 New->setCalledFunction(ModularFn);
4618 New->removeFnAttr("modular-format");
4619 B.Insert(New);
4620
4621 llvm::sort(NeededAspects);
4622 for (StringRef Request : NeededAspects)
4623 referenceAspect(Request, ImplName, M, B);
4624
4625 return New;
4626}
4627
4628Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4629 if (!CI->getCalledFunction()) return nullptr;
4630
4631 // Skip optimizing notail and musttail calls so
4632 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4633 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4634 if (CI->isMustTailCall() || CI->isNoTailCall())
4635 return nullptr;
4636
4637 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4638 replaceInstUsesWith(*From, With);
4639 };
4640 auto InstCombineErase = [this](Instruction *I) {
4642 };
4643 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4644 InstCombineRAUW, InstCombineErase);
4645 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4646 ++NumSimplified;
4647 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4648 }
4649 if (Value *With = optimizeModularFormat(CI, Builder)) {
4650 ++NumSimplified;
4651 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4652 }
4653
4654 return nullptr;
4655}
4656
4658 // Strip off at most one level of pointer casts, looking for an alloca. This
4659 // is good enough in practice and simpler than handling any number of casts.
4660 Value *Underlying = TrampMem->stripPointerCasts();
4661 if (Underlying != TrampMem &&
4662 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4663 return nullptr;
4664 if (!isa<AllocaInst>(Underlying))
4665 return nullptr;
4666
4667 IntrinsicInst *InitTrampoline = nullptr;
4668 for (User *U : TrampMem->users()) {
4670 if (!II)
4671 return nullptr;
4672 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4673 if (InitTrampoline)
4674 // More than one init_trampoline writes to this value. Give up.
4675 return nullptr;
4676 InitTrampoline = II;
4677 continue;
4678 }
4679 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4680 // Allow any number of calls to adjust.trampoline.
4681 continue;
4682 return nullptr;
4683 }
4684
4685 // No call to init.trampoline found.
4686 if (!InitTrampoline)
4687 return nullptr;
4688
4689 // Check that the alloca is being used in the expected way.
4690 if (InitTrampoline->getOperand(0) != TrampMem)
4691 return nullptr;
4692
4693 return InitTrampoline;
4694}
4695
4697 Value *TrampMem) {
4698 // Visit all the previous instructions in the basic block, and try to find a
4699 // init.trampoline which has a direct path to the adjust.trampoline.
4700 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4701 E = AdjustTramp->getParent()->begin();
4702 I != E;) {
4703 Instruction *Inst = &*--I;
4705 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4706 II->getOperand(0) == TrampMem)
4707 return II;
4708 if (Inst->mayWriteToMemory())
4709 return nullptr;
4710 }
4711 return nullptr;
4712}
4713
4714// Given a call to llvm.adjust.trampoline, find and return the corresponding
4715// call to llvm.init.trampoline if the call to the trampoline can be optimized
4716// to a direct call to a function. Otherwise return NULL.
4718 Callee = Callee->stripPointerCasts();
4719 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4720 if (!AdjustTramp ||
4721 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4722 return nullptr;
4723
4724 Value *TrampMem = AdjustTramp->getOperand(0);
4725
4727 return IT;
4728 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4729 return IT;
4730 return nullptr;
4731}
4732
4733Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4734 const Value *Callee = Call.getCalledOperand();
4735 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4736 if (!IPC || !IPC->isNoopCast(DL))
4737 return nullptr;
4738
4739 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4740 if (!II)
4741 return nullptr;
4742
4743 Intrinsic::ID IIID = II->getIntrinsicID();
4744 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4745 return nullptr;
4746
4747 // Isolate the ptrauth bundle from the others.
4748 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4750 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4751 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4752 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4753 PtrAuthBundleOrNone = Bundle;
4754 else
4755 NewBundles.emplace_back(Bundle);
4756 }
4757
4758 if (!PtrAuthBundleOrNone)
4759 return nullptr;
4760
4761 Value *NewCallee = nullptr;
4762 switch (IIID) {
4763 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4764 // assuming the call bundle and the sign operands match.
4765 case Intrinsic::ptrauth_resign: {
4766 // Resign result key should match bundle.
4767 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4768 return nullptr;
4769 // Resign result discriminator should match bundle.
4770 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4771 return nullptr;
4772
4773 // Resign input (auth) key should also match: we can't change the key on
4774 // the new call we're generating, because we don't know what keys are valid.
4775 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4776 return nullptr;
4777
4778 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4779 NewBundles.emplace_back("ptrauth", NewBundleOps);
4780 NewCallee = II->getOperand(0);
4781 break;
4782 }
4783
4784 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4785 // assuming the call bundle and the sign operands match.
4786 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4787 case Intrinsic::ptrauth_sign: {
4788 // Sign key should match bundle.
4789 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4790 return nullptr;
4791 // Sign discriminator should match bundle.
4792 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4793 return nullptr;
4794 NewCallee = II->getOperand(0);
4795 break;
4796 }
4797 default:
4798 llvm_unreachable("unexpected intrinsic ID");
4799 }
4800
4801 if (!NewCallee)
4802 return nullptr;
4803
4804 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4805 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4806 NewCall->setCalledOperand(NewCallee);
4807 return NewCall;
4808}
4809
4810Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4812 if (!CPA)
4813 return nullptr;
4814
4815 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4816 // If the ptrauth constant isn't based on a function pointer, bail out.
4817 if (!CalleeF)
4818 return nullptr;
4819
4820 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4822 if (!PAB)
4823 return nullptr;
4824
4825 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4826 Value *Discriminator = PAB->Inputs[1];
4827
4828 // If the bundle doesn't match, this is probably going to fail to auth.
4829 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4830 return nullptr;
4831
4832 // If the bundle matches the constant, proceed in making this a direct call.
4834 NewCall->setCalledOperand(CalleeF);
4835 return NewCall;
4836}
4837
4838bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4839 const TargetLibraryInfo *TLI) {
4840 // Note: We only handle cases which can't be driven from generic attributes
4841 // here. So, for example, nonnull and noalias (which are common properties
4842 // of some allocation functions) are expected to be handled via annotation
4843 // of the respective allocator declaration with generic attributes.
4844 bool Changed = false;
4845
4846 if (!Call.getType()->isPointerTy())
4847 return Changed;
4848
4849 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4850 if (Size && *Size != 0) {
4851 // TODO: We really should just emit deref_or_null here and then
4852 // let the generic inference code combine that with nonnull.
4853 if (Call.hasRetAttr(Attribute::NonNull)) {
4854 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4856 Call.getContext(), Size->getLimitedValue()));
4857 } else {
4858 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4860 Call.getContext(), Size->getLimitedValue()));
4861 }
4862 }
4863
4864 // Add alignment attribute if alignment is a power of two constant.
4865 Value *Alignment = getAllocAlignment(&Call, TLI);
4866 if (!Alignment)
4867 return Changed;
4868
4869 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4870 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4871 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4872 if (llvm::isPowerOf2_64(AlignmentVal)) {
4873 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4874 Align NewAlign = Align(AlignmentVal);
4875 if (NewAlign > ExistingAlign) {
4878 Changed = true;
4879 }
4880 }
4881 }
4882 return Changed;
4883}
4884
4885/// Improvements for call, callbr and invoke instructions.
4886Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4887 bool Changed = annotateAnyAllocSite(Call, &TLI);
4888
4889 // Mark any parameters that are known to be non-null with the nonnull
4890 // attribute. This is helpful for inlining calls to functions with null
4891 // checks on their arguments.
4892 SmallVector<unsigned, 4> ArgNos;
4893 unsigned ArgNo = 0;
4894
4895 for (Value *V : Call.args()) {
4896 if (V->getType()->isPointerTy()) {
4897 // Simplify the nonnull operand if the parameter is known to be nonnull.
4898 // Otherwise, try to infer nonnull for it.
4899 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4900 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4901 (HasDereferenceable &&
4903 V->getType()->getPointerAddressSpace()))) {
4904 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4905 replaceOperand(Call, ArgNo, Res);
4906 Changed = true;
4907 }
4908 } else if (isKnownNonZero(V,
4909 getSimplifyQuery().getWithInstruction(&Call))) {
4910 ArgNos.push_back(ArgNo);
4911 }
4912 }
4913 ArgNo++;
4914 }
4915
4916 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4917
4918 if (!ArgNos.empty()) {
4919 AttributeList AS = Call.getAttributes();
4920 LLVMContext &Ctx = Call.getContext();
4921 AS = AS.addParamAttribute(Ctx, ArgNos,
4922 Attribute::get(Ctx, Attribute::NonNull));
4923 Call.setAttributes(AS);
4924 Changed = true;
4925 }
4926
4927 // If the callee is a pointer to a function, attempt to move any casts to the
4928 // arguments of the call/callbr/invoke.
4930 Function *CalleeF = dyn_cast<Function>(Callee);
4931 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4932 transformConstExprCastCall(Call))
4933 return nullptr;
4934
4935 if (CalleeF) {
4936 // Remove the convergent attr on calls when the callee is not convergent.
4937 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4938 !CalleeF->isIntrinsic()) {
4939 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4940 << "\n");
4942 return &Call;
4943 }
4944
4945 // If the call and callee calling conventions don't match, and neither one
4946 // of the calling conventions is compatible with C calling convention
4947 // this call must be unreachable, as the call is undefined.
4948 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4949 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4953 // Only do this for calls to a function with a body. A prototype may
4954 // not actually end up matching the implementation's calling conv for a
4955 // variety of reasons (e.g. it may be written in assembly).
4956 !CalleeF->isDeclaration()) {
4957 Instruction *OldCall = &Call;
4959 // If OldCall does not return void then replaceInstUsesWith poison.
4960 // This allows ValueHandlers and custom metadata to adjust itself.
4961 if (!OldCall->getType()->isVoidTy())
4962 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4963 if (isa<CallInst>(OldCall))
4964 return eraseInstFromFunction(*OldCall);
4965
4966 // We cannot remove an invoke or a callbr, because it would change thexi
4967 // CFG, just change the callee to a null pointer.
4968 cast<CallBase>(OldCall)->setCalledFunction(
4969 CalleeF->getFunctionType(),
4970 Constant::getNullValue(CalleeF->getType()));
4971 return nullptr;
4972 }
4973 }
4974
4975 // Calling a null function pointer is undefined if a null address isn't
4976 // dereferenceable.
4977 if ((isa<ConstantPointerNull>(Callee) &&
4979 isa<UndefValue>(Callee)) {
4980 // If Call does not return void then replaceInstUsesWith poison.
4981 // This allows ValueHandlers and custom metadata to adjust itself.
4982 if (!Call.getType()->isVoidTy())
4984
4985 if (Call.isTerminator()) {
4986 // Can't remove an invoke or callbr because we cannot change the CFG.
4987 return nullptr;
4988 }
4989
4990 // This instruction is not reachable, just remove it.
4993 }
4994
4995 if (IntrinsicInst *II = findInitTrampoline(Callee))
4996 return transformCallThroughTrampoline(Call, *II);
4997
4998 // Combine calls involving pointer authentication intrinsics.
4999 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
5000 return NewCall;
5001
5002 // Combine calls to ptrauth constants.
5003 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
5004 return NewCall;
5005
5006 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
5007 InlineAsm *IA = cast<InlineAsm>(Callee);
5008 if (!IA->canThrow()) {
5009 // Normal inline asm calls cannot throw - mark them
5010 // 'nounwind'.
5012 Changed = true;
5013 }
5014 }
5015
5016 // Try to optimize the call if possible, we require DataLayout for most of
5017 // this. None of these calls are seen as possibly dead so go ahead and
5018 // delete the instruction now.
5019 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
5020 Instruction *I = tryOptimizeCall(CI);
5021 // If we changed something return the result, etc. Otherwise let
5022 // the fallthrough check.
5023 if (I) return eraseInstFromFunction(*I);
5024 }
5025
5026 if (!Call.use_empty() && !Call.isMustTailCall())
5027 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
5028 Type *CallTy = Call.getType();
5029 Type *RetArgTy = ReturnedArg->getType();
5030 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
5031 return replaceInstUsesWith(
5032 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
5033 }
5034
5035 // Drop unnecessary callee_type metadata from calls that were converted
5036 // into direct calls.
5037 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
5038 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
5039 Changed = true;
5040 }
5041
5042 // Drop unnecessary kcfi operand bundles from calls that were converted
5043 // into direct calls.
5045 if (Bundle && !Call.isIndirectCall()) {
5046 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
5047 if (CalleeF) {
5048 ConstantInt *FunctionType = nullptr;
5049 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
5050
5051 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
5052 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
5053
5054 if (FunctionType &&
5055 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
5056 dbgs() << Call.getModule()->getName()
5057 << ": warning: kcfi: " << Call.getCaller()->getName()
5058 << ": call to " << CalleeF->getName()
5059 << " using a mismatching function pointer type\n";
5060 }
5061 });
5062
5064 }
5065
5066 if (isRemovableAlloc(&Call, &TLI))
5067 return visitAllocSite(Call);
5068
5069 // Handle intrinsics which can be used in both call and invoke context.
5070 switch (Call.getIntrinsicID()) {
5071 case Intrinsic::experimental_gc_statepoint: {
5072 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
5073 SmallPtrSet<Value *, 32> LiveGcValues;
5074 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
5075 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
5076
5077 // Remove the relocation if unused.
5078 if (GCR.use_empty()) {
5080 continue;
5081 }
5082
5083 Value *DerivedPtr = GCR.getDerivedPtr();
5084 Value *BasePtr = GCR.getBasePtr();
5085
5086 // Undef is undef, even after relocation.
5087 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
5090 continue;
5091 }
5092
5093 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
5094 // The relocation of null will be null for most any collector.
5095 // TODO: provide a hook for this in GCStrategy. There might be some
5096 // weird collector this property does not hold for.
5097 if (isa<ConstantPointerNull>(DerivedPtr)) {
5098 // Use null-pointer of gc_relocate's type to replace it.
5101 continue;
5102 }
5103
5104 // isKnownNonNull -> nonnull attribute
5105 if (!GCR.hasRetAttr(Attribute::NonNull) &&
5106 isKnownNonZero(DerivedPtr,
5107 getSimplifyQuery().getWithInstruction(&Call))) {
5108 GCR.addRetAttr(Attribute::NonNull);
5109 // We discovered new fact, re-check users.
5110 Worklist.pushUsersToWorkList(GCR);
5111 }
5112 }
5113
5114 // If we have two copies of the same pointer in the statepoint argument
5115 // list, canonicalize to one. This may let us common gc.relocates.
5116 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
5117 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
5118 auto *OpIntTy = GCR.getOperand(2)->getType();
5119 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
5120 }
5121
5122 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
5123 // Canonicalize on the type from the uses to the defs
5124
5125 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
5126 LiveGcValues.insert(BasePtr);
5127 LiveGcValues.insert(DerivedPtr);
5128 }
5129 std::optional<OperandBundleUse> Bundle =
5131 unsigned NumOfGCLives = LiveGcValues.size();
5132 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
5133 break;
5134 // We can reduce the size of gc live bundle.
5135 DenseMap<Value *, unsigned> Val2Idx;
5136 std::vector<Value *> NewLiveGc;
5137 for (Value *V : Bundle->Inputs) {
5138 auto [It, Inserted] = Val2Idx.try_emplace(V);
5139 if (!Inserted)
5140 continue;
5141 if (LiveGcValues.count(V)) {
5142 It->second = NewLiveGc.size();
5143 NewLiveGc.push_back(V);
5144 } else
5145 It->second = NumOfGCLives;
5146 }
5147 // Update all gc.relocates
5148 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
5149 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
5150 Value *BasePtr = GCR.getBasePtr();
5151 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
5152 "Missed live gc for base pointer");
5153 auto *OpIntTy1 = GCR.getOperand(1)->getType();
5154 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
5155 Value *DerivedPtr = GCR.getDerivedPtr();
5156 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
5157 "Missed live gc for derived pointer");
5158 auto *OpIntTy2 = GCR.getOperand(2)->getType();
5159 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
5160 }
5161 // Create new statepoint instruction.
5162 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
5163 return CallBase::Create(&Call, NewBundle);
5164 }
5165 default: { break; }
5166 }
5167
5168 return Changed ? &Call : nullptr;
5169}
5170
5171/// If the callee is a constexpr cast of a function, attempt to move the cast to
5172/// the arguments of the call/invoke.
5173/// CallBrInst is not supported.
5174bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
5175 auto *Callee =
5177 if (!Callee)
5178 return false;
5179
5181 "CallBr's don't have a single point after a def to insert at");
5182
5183 // Don't perform the transform for declarations, which may not be fully
5184 // accurate. For example, void @foo() is commonly used as a placeholder for
5185 // unknown prototypes.
5186 if (Callee->isDeclaration())
5187 return false;
5188
5189 // If this is a call to a thunk function, don't remove the cast. Thunks are
5190 // used to transparently forward all incoming parameters and outgoing return
5191 // values, so it's important to leave the cast in place.
5192 if (Callee->hasFnAttribute("thunk"))
5193 return false;
5194
5195 // If this is a call to a naked function, the assembly might be
5196 // using an argument, or otherwise rely on the frame layout,
5197 // the function prototype will mismatch.
5198 if (Callee->hasFnAttribute(Attribute::Naked))
5199 return false;
5200
5201 // If this is a musttail call, the callee's prototype must match the caller's
5202 // prototype with the exception of pointee types. The code below doesn't
5203 // implement that, so we can't do this transform.
5204 // TODO: Do the transform if it only requires adding pointer casts.
5205 if (Call.isMustTailCall())
5206 return false;
5207
5209 const AttributeList &CallerPAL = Call.getAttributes();
5210
5211 // Okay, this is a cast from a function to a different type. Unless doing so
5212 // would cause a type conversion of one of our arguments, change this call to
5213 // be a direct call with arguments casted to the appropriate types.
5214 FunctionType *FT = Callee->getFunctionType();
5215 Type *OldRetTy = Caller->getType();
5216 Type *NewRetTy = FT->getReturnType();
5217
5218 // Check to see if we are changing the return type...
5219 if (OldRetTy != NewRetTy) {
5220
5221 if (NewRetTy->isStructTy())
5222 return false; // TODO: Handle multiple return values.
5223
5224 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
5225 if (!Caller->use_empty())
5226 return false; // Cannot transform this return value.
5227 }
5228
5229 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5230 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5231 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
5232 NewRetTy, CallerPAL.getRetAttrs())))
5233 return false; // Attribute not compatible with transformed value.
5234 }
5235
5236 // If the callbase is an invoke instruction, and the return value is
5237 // used by a PHI node in a successor, we cannot change the return type of
5238 // the call because there is no place to put the cast instruction (without
5239 // breaking the critical edge). Bail out in this case.
5240 if (!Caller->use_empty()) {
5241 BasicBlock *PhisNotSupportedBlock = nullptr;
5242 if (auto *II = dyn_cast<InvokeInst>(Caller))
5243 PhisNotSupportedBlock = II->getNormalDest();
5244 if (PhisNotSupportedBlock)
5245 for (User *U : Caller->users())
5246 if (PHINode *PN = dyn_cast<PHINode>(U))
5247 if (PN->getParent() == PhisNotSupportedBlock)
5248 return false;
5249 }
5250 }
5251
5252 unsigned NumActualArgs = Call.arg_size();
5253 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5254
5255 // Prevent us turning:
5256 // declare void @takes_i32_inalloca(i32* inalloca)
5257 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5258 //
5259 // into:
5260 // call void @takes_i32_inalloca(i32* null)
5261 //
5262 // Similarly, avoid folding away bitcasts of byval calls.
5263 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5264 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5265 return false;
5266
5267 auto AI = Call.arg_begin();
5268 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5269 Type *ParamTy = FT->getParamType(i);
5270 Type *ActTy = (*AI)->getType();
5271
5272 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5273 return false; // Cannot transform this parameter value.
5274
5275 // Check if there are any incompatible attributes we cannot drop safely.
5276 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5277 .overlaps(AttributeFuncs::typeIncompatible(
5278 ParamTy, CallerPAL.getParamAttrs(i),
5279 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5280 return false; // Attribute not compatible with transformed value.
5281
5282 if (Call.isInAllocaArgument(i) ||
5283 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5284 return false; // Cannot transform to and from inalloca/preallocated.
5285
5286 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5287 return false;
5288
5289 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5290 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5291 return false; // Cannot transform to or from byval.
5292 }
5293
5294 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5295 !CallerPAL.isEmpty()) {
5296 // In this case we have more arguments than the new function type, but we
5297 // won't be dropping them. Check that these extra arguments have attributes
5298 // that are compatible with being a vararg call argument.
5299 unsigned SRetIdx;
5300 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5301 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5302 return false;
5303 }
5304
5305 // Okay, we decided that this is a safe thing to do: go ahead and start
5306 // inserting cast instructions as necessary.
5307 SmallVector<Value *, 8> Args;
5309 Args.reserve(NumActualArgs);
5310 ArgAttrs.reserve(NumActualArgs);
5311
5312 // Get any return attributes.
5313 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5314
5315 // If the return value is not being used, the type may not be compatible
5316 // with the existing attributes. Wipe out any problematic attributes.
5317 RAttrs.remove(
5318 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5319
5320 LLVMContext &Ctx = Call.getContext();
5321 AI = Call.arg_begin();
5322 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5323 Type *ParamTy = FT->getParamType(i);
5324
5325 Value *NewArg = *AI;
5326 if ((*AI)->getType() != ParamTy)
5327 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5328 Args.push_back(NewArg);
5329
5330 // Add any parameter attributes except the ones incompatible with the new
5331 // type. Note that we made sure all incompatible ones are safe to drop.
5332 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5333 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5334 ArgAttrs.push_back(
5335 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5336 }
5337
5338 // If the function takes more arguments than the call was taking, add them
5339 // now.
5340 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5341 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5342 ArgAttrs.push_back(AttributeSet());
5343 }
5344
5345 // If we are removing arguments to the function, emit an obnoxious warning.
5346 if (FT->getNumParams() < NumActualArgs) {
5347 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5348 if (FT->isVarArg()) {
5349 // Add all of the arguments in their promoted form to the arg list.
5350 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5351 Type *PTy = getPromotedType((*AI)->getType());
5352 Value *NewArg = *AI;
5353 if (PTy != (*AI)->getType()) {
5354 // Must promote to pass through va_arg area!
5355 Instruction::CastOps opcode =
5356 CastInst::getCastOpcode(*AI, false, PTy, false);
5357 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5358 }
5359 Args.push_back(NewArg);
5360
5361 // Add any parameter attributes.
5362 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5363 }
5364 }
5365 }
5366
5367 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5368
5369 if (NewRetTy->isVoidTy())
5370 Caller->setName(""); // Void type should not have a name.
5371
5372 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5373 "missing argument attributes");
5374 AttributeList NewCallerPAL = AttributeList::get(
5375 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5376
5378 Call.getOperandBundlesAsDefs(OpBundles);
5379
5380 CallBase *NewCall;
5381 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5382 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5383 II->getUnwindDest(), Args, OpBundles);
5384 } else {
5385 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5386 cast<CallInst>(NewCall)->setTailCallKind(
5387 cast<CallInst>(Caller)->getTailCallKind());
5388 }
5389 NewCall->takeName(Caller);
5391 NewCall->setAttributes(NewCallerPAL);
5392
5393 // Preserve prof metadata if any.
5394 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5395
5396 // Insert a cast of the return type as necessary.
5397 Instruction *NC = NewCall;
5398 Value *NV = NC;
5399 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5400 assert(!NV->getType()->isVoidTy());
5402 NC->setDebugLoc(Caller->getDebugLoc());
5403
5404 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5405 assert(OptInsertPt && "No place to insert cast");
5406 InsertNewInstBefore(NC, *OptInsertPt);
5407 Worklist.pushUsersToWorkList(*Caller);
5408 }
5409
5410 if (!Caller->use_empty())
5411 replaceInstUsesWith(*Caller, NV);
5412 else if (Caller->hasValueHandle()) {
5413 if (OldRetTy == NV->getType())
5415 else
5416 // We cannot call ValueIsRAUWd with a different type, and the
5417 // actual tracked value will disappear.
5419 }
5420
5421 eraseInstFromFunction(*Caller);
5422 return true;
5423}
5424
5425/// Turn a call to a function created by init_trampoline / adjust_trampoline
5426/// intrinsic pair into a direct call to the underlying function.
5428InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5429 IntrinsicInst &Tramp) {
5430 FunctionType *FTy = Call.getFunctionType();
5431 AttributeList Attrs = Call.getAttributes();
5432
5433 // If the call already has the 'nest' attribute somewhere then give up -
5434 // otherwise 'nest' would occur twice after splicing in the chain.
5435 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5436 return nullptr;
5437
5439 FunctionType *NestFTy = NestF->getFunctionType();
5440
5441 AttributeList NestAttrs = NestF->getAttributes();
5442 if (!NestAttrs.isEmpty()) {
5443 unsigned NestArgNo = 0;
5444 Type *NestTy = nullptr;
5445 AttributeSet NestAttr;
5446
5447 // Look for a parameter marked with the 'nest' attribute.
5448 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5449 E = NestFTy->param_end();
5450 I != E; ++NestArgNo, ++I) {
5451 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5452 if (AS.hasAttribute(Attribute::Nest)) {
5453 // Record the parameter type and any other attributes.
5454 NestTy = *I;
5455 NestAttr = AS;
5456 break;
5457 }
5458 }
5459
5460 if (NestTy) {
5461 std::vector<Value*> NewArgs;
5462 std::vector<AttributeSet> NewArgAttrs;
5463 NewArgs.reserve(Call.arg_size() + 1);
5464 NewArgAttrs.reserve(Call.arg_size());
5465
5466 // Insert the nest argument into the call argument list, which may
5467 // mean appending it. Likewise for attributes.
5468
5469 {
5470 unsigned ArgNo = 0;
5471 auto I = Call.arg_begin(), E = Call.arg_end();
5472 do {
5473 if (ArgNo == NestArgNo) {
5474 // Add the chain argument and attributes.
5475 Value *NestVal = Tramp.getArgOperand(2);
5476 if (NestVal->getType() != NestTy)
5477 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5478 NewArgs.push_back(NestVal);
5479 NewArgAttrs.push_back(NestAttr);
5480 }
5481
5482 if (I == E)
5483 break;
5484
5485 // Add the original argument and attributes.
5486 NewArgs.push_back(*I);
5487 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5488
5489 ++ArgNo;
5490 ++I;
5491 } while (true);
5492 }
5493
5494 // The trampoline may have been bitcast to a bogus type (FTy).
5495 // Handle this by synthesizing a new function type, equal to FTy
5496 // with the chain parameter inserted.
5497
5498 std::vector<Type*> NewTypes;
5499 NewTypes.reserve(FTy->getNumParams()+1);
5500
5501 // Insert the chain's type into the list of parameter types, which may
5502 // mean appending it.
5503 {
5504 unsigned ArgNo = 0;
5505 FunctionType::param_iterator I = FTy->param_begin(),
5506 E = FTy->param_end();
5507
5508 do {
5509 if (ArgNo == NestArgNo)
5510 // Add the chain's type.
5511 NewTypes.push_back(NestTy);
5512
5513 if (I == E)
5514 break;
5515
5516 // Add the original type.
5517 NewTypes.push_back(*I);
5518
5519 ++ArgNo;
5520 ++I;
5521 } while (true);
5522 }
5523
5524 // Replace the trampoline call with a direct call. Let the generic
5525 // code sort out any function type mismatches.
5526 FunctionType *NewFTy =
5527 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5528 AttributeList NewPAL =
5529 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5530 Attrs.getRetAttrs(), NewArgAttrs);
5531
5533 Call.getOperandBundlesAsDefs(OpBundles);
5534
5535 Instruction *NewCaller;
5536 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5537 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5538 II->getUnwindDest(), NewArgs, OpBundles);
5539 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5540 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5541 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5542 NewCaller =
5543 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5544 CBI->getIndirectDests(), NewArgs, OpBundles);
5545 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5546 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5547 } else {
5548 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5549 cast<CallInst>(NewCaller)->setTailCallKind(
5550 cast<CallInst>(Call).getTailCallKind());
5551 cast<CallInst>(NewCaller)->setCallingConv(
5552 cast<CallInst>(Call).getCallingConv());
5553 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5554 }
5555 NewCaller->setDebugLoc(Call.getDebugLoc());
5556
5557 return NewCaller;
5558 }
5559 }
5560
5561 // Replace the trampoline call with a direct call. Since there is no 'nest'
5562 // parameter, there is no need to adjust the argument list. Let the generic
5563 // code sort out any function type mismatches.
5564 Call.setCalledFunction(FTy, NestF);
5565 return &Call;
5566}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static void referenceAspect(StringRef Aspect, StringRef ImplName, Module *M, IRBuilderBase &B)
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static bool isAspectNeeded(StringRef Aspect, CallInst *CI, std::optional< unsigned > FirstArgIdx, const std::optional< Bitset< 256 > > &Specifiers)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy)
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static Bitset< 256 > parseFormatStringSpecifiers(StringRef FormatStr)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:262
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:275
bool isNegative() const
Definition APFloat.h:1544
void clearSign()
Definition APFloat.h:1363
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1153
bool isZero() const
Definition APFloat.h:1540
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1203
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1213
bool isInfinity() const
Definition APFloat.h:1541
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1983
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1963
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1970
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2071
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1597
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1976
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:310
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:302
This class represents any memset intrinsic.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:271
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:314
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:275
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
This is a constexpr reimplementation of a subset of std::bitset.
Definition Bitset.h:30
constexpr bool any() const
Definition Bitset.h:113
constexpr Bitset & set()
Definition Bitset.h:81
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
static LLVM_ABI CallBase * removeOperandBundleAt(CallBase *CB, size_t Offset, InsertPosition InsertPtr=nullptr)
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:743
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:748
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:751
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:756
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:934
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:874
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI ConstantRange multiply(const ConstantRange &Other, unsigned NoWrapKind=0) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:216
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:592
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:328
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:576
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:346
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:457
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2110
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2639
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:462
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2474
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2237
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, poison, ShMask) = C for lanes that select NewC.
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:348
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1554
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:271
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:130
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
static constexpr size_t npos
Definition StringRef.h:58
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:148
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:156
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1263
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1316
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:799
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:346
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:798
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
auto m_UMin(const Opnd0 &Op0, const Opnd1 &Op1)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
auto m_SMax(const Opnd0 &Op0, const Opnd1 &Op1)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_UMax(const Opnd0 &Op0, const Opnd1 &Op1)
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
auto m_Constant()
Match an arbitrary Constant and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
auto m_c_MaxOrMin(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
auto m_SMin(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
auto m_UnOp()
Match an arbitrary unary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_MaxOrMin(const Opnd0 &Op0, const Opnd1 &Op1)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:573
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
BundleAttr getBundleAttrFromOBU(OperandBundleUse OBU)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI AssumeSeparateStorageInfo getAssumeSeparateStorageInfo(OperandBundleUse)
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1762
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:253
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1717
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1748
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1662
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1777
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1698
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI AssumeNonNullInfo getAssumeNonNullInfo(OperandBundleUse)
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
Definition InstrProf.h:145
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
LLVM_ABI AssumeAlignInfo getAssumeAlignInfo(OperandBundleUse)
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const SimplifyQuery &Q, bool IgnoreFree=false)
Equivalent to isDereferenceableAndAlignedPointer with an alignment of 1.
Definition Loads.cpp:264
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1735
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1775
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI AssumeDereferenceableInfo getAssumeDereferenceableInfo(OperandBundleUse)
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI AssumeNoUndefInfo getAssumeNoUndefInfo(OperandBundleUse)
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:288
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:109
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:294
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const