LLVM 23.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file does AMD library function optimizations.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULibFunc.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include "llvm/IR/MDBuilder.h"
25#include <cmath>
26
27#define DEBUG_TYPE "amdgpu-simplifylib"
28
29using namespace llvm;
30using namespace llvm::PatternMatch;
31
32static cl::opt<bool> EnablePreLink("amdgpu-prelink",
33 cl::desc("Enable pre-link mode optimizations"),
34 cl::init(false),
36
37static cl::list<std::string> UseNative("amdgpu-use-native",
38 cl::desc("Comma separated list of functions to replace with native, or all"),
41
42#define MATH_PI numbers::pi
43#define MATH_E numbers::e
44#define MATH_SQRT2 numbers::sqrt2
45#define MATH_SQRT1_2 numbers::inv_sqrt2
46
47enum class PowKind { Pow, PowR, PowN, RootN };
48
49namespace llvm {
50
52private:
54
55 using FuncInfo = llvm::AMDGPULibFunc;
56
57 // -fuse-native.
58 bool AllNative = false;
59
60 bool useNativeFunc(const StringRef F) const;
61
62 // Return a pointer (pointer expr) to the function if function definition with
63 // "FuncName" exists. It may create a new function prototype in pre-link mode.
64 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
65
66 /// Wrapper around getFunction which tries to use a faster variant if
67 /// available, and falls back to a less fast option.
68 ///
69 /// Return a replacement function for \p fInfo that has float-typed fast
70 /// variants. \p NewFunc is a base replacement function to use. \p
71 /// NewFuncFastVariant is a faster version to use if the calling context knows
72 /// it's legal. If there is no fast variant to use, \p NewFuncFastVariant
73 /// should be EI_NONE.
74 FunctionCallee getFloatFastVariant(Module *M, const FuncInfo &fInfo,
75 FuncInfo &newInfo,
77 AMDGPULibFunc::EFuncId NewFuncFastVariant);
78
79 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
80
81 bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
82
83 /* Specialized optimizations */
84
85 // pow/powr/pown
86 bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
87
88 /// Peform a fast math expansion of pow, powr, pown or rootn.
89 bool expandFastPow(FPMathOperator *FPOp, IRBuilder<> &B, PowKind Kind);
90
91 bool tryOptimizePow(FPMathOperator *FPOp, IRBuilder<> &B,
92 const FuncInfo &FInfo);
93
94 // rootn
95 bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
96
97 // -fuse-native for sincos
98 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
99
100 // evaluate calls if calls' arguments are constants.
101 bool evaluateScalarMathFunc(const FuncInfo &FInfo, APFloat &Res0,
102 APFloat &Res1, Constant *copr0, Constant *copr1);
103 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
104
105 /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
106 /// of cos, sincos call).
107 std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
108 FastMathFlags FMF,
109 IRBuilder<> &B,
110 FunctionCallee Fsincos);
111
112 // sin/cos
113 bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
114
115 // __read_pipe/__write_pipe
116 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
117 const FuncInfo &FInfo);
118
119 // Get a scalar native builtin single argument FP function
120 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
121
122 /// Substitute a call to a known libcall with an intrinsic call. If \p
123 /// AllowMinSize is true, allow the replacement in a minsize function.
124 bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
125 bool AllowMinSizeF32 = false,
126 bool AllowF64 = false,
127 bool AllowStrictFP = false);
128 void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
129 Intrinsic::ID IntrID);
130
131 bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
132 Intrinsic::ID IntrID,
133 bool AllowMinSizeF32 = false,
134 bool AllowF64 = false,
135 bool AllowStrictFP = false);
136
137protected:
138 bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const;
139
141
142 static void replaceCall(Instruction *I, Value *With) {
143 I->replaceAllUsesWith(With);
144 I->eraseFromParent();
145 }
146
147 static void replaceCall(FPMathOperator *I, Value *With) {
149 }
150
151public:
153
154 bool fold(CallInst *CI);
155
156 void initNativeFuncs();
157
158 // Replace a normal math function call with that native version
159 bool useNative(CallInst *CI);
160};
161
162} // end namespace llvm
163
164template <typename IRB>
165static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
166 const Twine &Name = "") {
167 CallInst *R = B.CreateCall(Callee, Arg, Name);
168 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
169 R->setCallingConv(F->getCallingConv());
170 return R;
171}
172
173template <typename IRB>
174static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
175 Value *Arg2, const Twine &Name = "") {
176 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
177 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
178 R->setCallingConv(F->getCallingConv());
179 return R;
180}
181
183 Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
184 if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
185 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
186
187 return FunctionType::get(FT->getReturnType(),
188 {FT->getParamType(0), PowNExpTy}, false);
189}
190
191// Data structures for table-driven optimizations.
192// FuncTbl works for both f32 and f64 functions with 1 input argument
193
195 double result;
196 double input;
197};
198
199/* a list of {result, input} */
200static const TableEntry tbl_acos[] = {
201 {MATH_PI / 2.0, 0.0},
202 {MATH_PI / 2.0, -0.0},
203 {0.0, 1.0},
204 {MATH_PI, -1.0}
205};
206static const TableEntry tbl_acosh[] = {
207 {0.0, 1.0}
208};
209static const TableEntry tbl_acospi[] = {
210 {0.5, 0.0},
211 {0.5, -0.0},
212 {0.0, 1.0},
213 {1.0, -1.0}
214};
215static const TableEntry tbl_asin[] = {
216 {0.0, 0.0},
217 {-0.0, -0.0},
218 {MATH_PI / 2.0, 1.0},
219 {-MATH_PI / 2.0, -1.0}
220};
221static const TableEntry tbl_asinh[] = {
222 {0.0, 0.0},
223 {-0.0, -0.0}
224};
225static const TableEntry tbl_asinpi[] = {
226 {0.0, 0.0},
227 {-0.0, -0.0},
228 {0.5, 1.0},
229 {-0.5, -1.0}
230};
231static const TableEntry tbl_atan[] = {
232 {0.0, 0.0},
233 {-0.0, -0.0},
234 {MATH_PI / 4.0, 1.0},
235 {-MATH_PI / 4.0, -1.0}
236};
237static const TableEntry tbl_atanh[] = {
238 {0.0, 0.0},
239 {-0.0, -0.0}
240};
241static const TableEntry tbl_atanpi[] = {
242 {0.0, 0.0},
243 {-0.0, -0.0},
244 {0.25, 1.0},
245 {-0.25, -1.0}
246};
247static const TableEntry tbl_cbrt[] = {
248 {0.0, 0.0},
249 {-0.0, -0.0},
250 {1.0, 1.0},
251 {-1.0, -1.0},
252};
253static const TableEntry tbl_cos[] = {
254 {1.0, 0.0},
255 {1.0, -0.0}
256};
257static const TableEntry tbl_cosh[] = {
258 {1.0, 0.0},
259 {1.0, -0.0}
260};
261static const TableEntry tbl_cospi[] = {
262 {1.0, 0.0},
263 {1.0, -0.0}
264};
265static const TableEntry tbl_erfc[] = {
266 {1.0, 0.0},
267 {1.0, -0.0}
268};
269static const TableEntry tbl_erf[] = {
270 {0.0, 0.0},
271 {-0.0, -0.0}
272};
273static const TableEntry tbl_exp[] = {
274 {1.0, 0.0},
275 {1.0, -0.0},
276 {MATH_E, 1.0}
277};
278static const TableEntry tbl_exp2[] = {
279 {1.0, 0.0},
280 {1.0, -0.0},
281 {2.0, 1.0}
282};
283static const TableEntry tbl_exp10[] = {
284 {1.0, 0.0},
285 {1.0, -0.0},
286 {10.0, 1.0}
287};
288static const TableEntry tbl_expm1[] = {
289 {0.0, 0.0},
290 {-0.0, -0.0}
291};
292static const TableEntry tbl_log[] = {
293 {0.0, 1.0},
294 {1.0, MATH_E}
295};
296static const TableEntry tbl_log2[] = {
297 {0.0, 1.0},
298 {1.0, 2.0}
299};
300static const TableEntry tbl_log10[] = {
301 {0.0, 1.0},
302 {1.0, 10.0}
303};
304static const TableEntry tbl_rsqrt[] = {
305 {1.0, 1.0},
306 {MATH_SQRT1_2, 2.0}
307};
308static const TableEntry tbl_sin[] = {
309 {0.0, 0.0},
310 {-0.0, -0.0}
311};
312static const TableEntry tbl_sinh[] = {
313 {0.0, 0.0},
314 {-0.0, -0.0}
315};
316static const TableEntry tbl_sinpi[] = {
317 {0.0, 0.0},
318 {-0.0, -0.0}
319};
320static const TableEntry tbl_sqrt[] = {
321 {0.0, 0.0},
322 {1.0, 1.0},
323 {MATH_SQRT2, 2.0}
324};
325static const TableEntry tbl_tan[] = {
326 {0.0, 0.0},
327 {-0.0, -0.0}
328};
329static const TableEntry tbl_tanh[] = {
330 {0.0, 0.0},
331 {-0.0, -0.0}
332};
333static const TableEntry tbl_tanpi[] = {
334 {0.0, 0.0},
335 {-0.0, -0.0}
336};
337static const TableEntry tbl_tgamma[] = {
338 {1.0, 1.0},
339 {1.0, 2.0},
340 {2.0, 3.0},
341 {6.0, 4.0}
342};
343
345 switch(id) {
361 return true;
362 default:;
363 }
364 return false;
365}
366
368
370 switch(id) {
408 default:;
409 }
410 return TableRef();
411}
412
413static inline int getVecSize(const AMDGPULibFunc& FInfo) {
414 return FInfo.getLeads()[0].VectorSize;
415}
416
417static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
418 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
419}
420
421FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
422 // If we are doing PreLinkOpt, the function is external. So it is safe to
423 // use getOrInsertFunction() at this stage.
424
426 : AMDGPULibFunc::getFunction(M, fInfo);
427}
428
429FunctionCallee AMDGPULibCalls::getFloatFastVariant(
430 Module *M, const FuncInfo &fInfo, FuncInfo &newInfo,
431 AMDGPULibFunc::EFuncId NewFunc, AMDGPULibFunc::EFuncId FastVariant) {
432 assert(NewFunc != FastVariant);
433
434 if (FastVariant != AMDGPULibFunc::EI_NONE &&
435 getArgType(fInfo) == AMDGPULibFunc::F32) {
436 newInfo = AMDGPULibFunc(FastVariant, fInfo);
437 if (FunctionCallee NewCallee = getFunction(M, newInfo))
438 return NewCallee;
439 }
440
441 newInfo = AMDGPULibFunc(NewFunc, fInfo);
442 return getFunction(M, newInfo);
443}
444
445bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
446 FuncInfo &FInfo) {
447 return AMDGPULibFunc::parse(FMangledName, FInfo);
448}
449
451 return FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs();
452}
453
455 const FPMathOperator *FPOp) const {
456 // TODO: Refine to approxFunc or contract
457 return FPOp->isFast();
458}
459
461 : SQ(F.getParent()->getDataLayout(),
462 &FAM.getResult<TargetLibraryAnalysis>(F),
463 FAM.getCachedResult<DominatorTreeAnalysis>(F),
464 &FAM.getResult<AssumptionAnalysis>(F)) {}
465
466bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
467 return AllNative || llvm::is_contained(UseNative, F);
468}
469
471 AllNative = useNativeFunc("all") ||
472 (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
473 UseNative.begin()->empty());
474}
475
476bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
477 bool native_sin = useNativeFunc("sin");
478 bool native_cos = useNativeFunc("cos");
479
480 if (native_sin && native_cos) {
481 Module *M = aCI->getModule();
482 Value *opr0 = aCI->getArgOperand(0);
483
484 AMDGPULibFunc nf;
485 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
486 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
487
490 FunctionCallee sinExpr = getFunction(M, nf);
491
494 FunctionCallee cosExpr = getFunction(M, nf);
495 if (sinExpr && cosExpr) {
496 Value *sinval =
497 CallInst::Create(sinExpr, opr0, "splitsin", aCI->getIterator());
498 Value *cosval =
499 CallInst::Create(cosExpr, opr0, "splitcos", aCI->getIterator());
500 new StoreInst(cosval, aCI->getArgOperand(1), aCI->getIterator());
501
502 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
503 << " with native version of sin/cos");
504
505 replaceCall(aCI, sinval);
506 return true;
507 }
508 }
509 return false;
510}
511
513 Function *Callee = aCI->getCalledFunction();
514 if (!Callee || aCI->isNoBuiltin())
515 return false;
516
517 FuncInfo FInfo;
518 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
519 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
520 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
521 !(AllNative || useNativeFunc(FInfo.getName()))) {
522 return false;
523 }
524
525 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
526 return sincosUseNative(aCI, FInfo);
527
529 FunctionCallee F = getFunction(aCI->getModule(), FInfo);
530 if (!F)
531 return false;
532
533 aCI->setCalledFunction(F);
534 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
535 << " with native version");
536 return true;
537}
538
539// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
540// builtin, with appended type size and alignment arguments, where 2 or 4
541// indicates the original number of arguments. The library has optimized version
542// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
543// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
544// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
545// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
546bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
547 const FuncInfo &FInfo) {
548 auto *Callee = CI->getCalledFunction();
549 if (!Callee->isDeclaration())
550 return false;
551
552 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
553 auto *M = Callee->getParent();
554 std::string Name = std::string(Callee->getName());
555 auto NumArg = CI->arg_size();
556 if (NumArg != 4 && NumArg != 6)
557 return false;
558 ConstantInt *PacketSize =
559 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2));
560 ConstantInt *PacketAlign =
561 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1));
562 if (!PacketSize || !PacketAlign)
563 return false;
564
565 unsigned Size = PacketSize->getZExtValue();
566 Align Alignment = PacketAlign->getAlignValue();
567 if (Alignment != Size)
568 return false;
569
570 unsigned PtrArgLoc = CI->arg_size() - 3;
571 Value *PtrArg = CI->getArgOperand(PtrArgLoc);
572 Type *PtrTy = PtrArg->getType();
573
575 for (unsigned I = 0; I != PtrArgLoc; ++I)
576 ArgTys.push_back(CI->getArgOperand(I)->getType());
577 ArgTys.push_back(PtrTy);
578
579 Name = Name + "_" + std::to_string(Size);
580 auto *FTy = FunctionType::get(Callee->getReturnType(),
581 ArrayRef<Type *>(ArgTys), false);
582 AMDGPULibFunc NewLibFunc(Name, FTy);
584 if (!F)
585 return false;
586
588 for (unsigned I = 0; I != PtrArgLoc; ++I)
589 Args.push_back(CI->getArgOperand(I));
590 Args.push_back(PtrArg);
591
592 auto *NCI = B.CreateCall(F, Args);
593 NCI->setAttributes(CI->getAttributes());
594 CI->replaceAllUsesWith(NCI);
595 CI->dropAllReferences();
596 CI->eraseFromParent();
597
598 return true;
599}
600
601// This function returns false if no change; return true otherwise.
603 Function *Callee = CI->getCalledFunction();
604 // Ignore indirect calls.
605 if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
606 return false;
607
608 FuncInfo FInfo;
609 if (!parseFunctionName(Callee->getName(), FInfo))
610 return false;
611
612 // Further check the number of arguments to see if they match.
613 // TODO: Check calling convention matches too
614 if (!FInfo.isCompatibleSignature(*Callee->getParent(), CI->getFunctionType()))
615 return false;
616
617 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n');
618
619 if (TDOFold(CI, FInfo))
620 return true;
621
622 IRBuilder<> B(CI);
623 if (CI->isStrictFP())
624 B.setIsFPConstrained(true);
625
627 // Under unsafe-math, evaluate calls if possible.
628 // According to Brian Sumner, we can do this for all f32 function calls
629 // using host's double function calls.
630 if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo))
631 return true;
632
633 // Copy fast flags from the original call.
634 FastMathFlags FMF = FPOp->getFastMathFlags();
635 B.setFastMathFlags(FMF);
636
637 // Specialized optimizations for each function call.
638 //
639 // TODO: Handle native functions
640 switch (FInfo.getId()) {
642 if (FMF.none())
643 return false;
644 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp,
645 FMF.approxFunc());
647 if (FMF.none())
648 return false;
649 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2,
650 FMF.approxFunc());
652 if (FMF.none())
653 return false;
654 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log,
655 FMF.approxFunc());
657 if (FMF.none())
658 return false;
659 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2,
660 FMF.approxFunc());
662 if (FMF.none())
663 return false;
664 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10,
665 FMF.approxFunc());
667 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum,
668 true, true);
670 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum,
671 true, true);
673 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true,
674 true);
676 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd,
677 true, true);
679 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true,
680 true, true);
682 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign,
683 true, true, true);
685 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true,
686 true);
688 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true,
689 true);
691 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true,
692 true);
694 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true,
695 true);
697 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true,
698 true);
700 if (!shouldReplaceLibcallWithIntrinsic(CI, true, true))
701 return false;
702
703 Value *Arg1 = CI->getArgOperand(1);
704 if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType());
705 VecTy && !isa<VectorType>(Arg1->getType())) {
706 Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
707 CI->setArgOperand(1, SplatArg1);
708 }
709
711 CI->getModule(), Intrinsic::ldexp,
712 {CI->getType(), CI->getArgOperand(1)->getType()}));
713 return true;
714 }
717 return tryOptimizePow(FPOp, B, FInfo);
720 if (fold_pow(FPOp, B, FInfo))
721 return true;
722 if (!FMF.approxFunc())
723 return false;
724
725 if (FInfo.getId() == AMDGPULibFunc::EI_POWR && FMF.approxFunc() &&
726 getArgType(FInfo) == AMDGPULibFunc::F32) {
727 Module *M = Callee->getParent();
728 AMDGPULibFunc PowrFastInfo(AMDGPULibFunc::EI_POWR_FAST, FInfo);
729 if (FunctionCallee PowrFastFunc = getFunction(M, PowrFastInfo)) {
730 CI->setCalledFunction(PowrFastFunc);
731 return true;
732 }
733 }
734
735 if (!shouldReplaceLibcallWithIntrinsic(CI))
736 return false;
737 return expandFastPow(FPOp, B, PowKind::PowR);
738 }
741 if (fold_pow(FPOp, B, FInfo))
742 return true;
743 if (!FMF.approxFunc())
744 return false;
745
746 if (FInfo.getId() == AMDGPULibFunc::EI_POWN &&
747 getArgType(FInfo) == AMDGPULibFunc::F32) {
748 Module *M = Callee->getParent();
749 AMDGPULibFunc PownFastInfo(AMDGPULibFunc::EI_POWN_FAST, FInfo);
750 if (FunctionCallee PownFastFunc = getFunction(M, PownFastInfo)) {
751 CI->setCalledFunction(PownFastFunc);
752 return true;
753 }
754 }
755
756 if (!shouldReplaceLibcallWithIntrinsic(CI))
757 return false;
758 return expandFastPow(FPOp, B, PowKind::PowN);
759 }
762 if (fold_rootn(FPOp, B, FInfo))
763 return true;
764 if (!FMF.approxFunc())
765 return false;
766
767 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
768 Module *M = Callee->getParent();
769 AMDGPULibFunc RootnFastInfo(AMDGPULibFunc::EI_ROOTN_FAST, FInfo);
770 if (FunctionCallee RootnFastFunc = getFunction(M, RootnFastInfo)) {
771 CI->setCalledFunction(RootnFastFunc);
772 return true;
773 }
774 }
775
776 return expandFastPow(FPOp, B, PowKind::RootN);
777 }
779 // TODO: Allow with strictfp + constrained intrinsic
780 return tryReplaceLibcallWithSimpleIntrinsic(
781 B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
784 return fold_sincos(FPOp, B, FInfo);
785 default:
786 break;
787 }
788 } else {
789 // Specialized optimizations for each function call
790 switch (FInfo.getId()) {
795 return fold_read_write_pipe(CI, B, FInfo);
796 default:
797 break;
798 }
799 }
800
801 return false;
802}
803
805 const Type *Ty) {
806 Type *ElemTy = Ty->getScalarType();
807 const fltSemantics &FltSem = ElemTy->getFltSemantics();
808
809 SmallVector<Constant *, 4> ConstValues;
810 ConstValues.reserve(Values.size());
811 for (APFloat APF : Values) {
812 bool Unused;
813 APF.convert(FltSem, APFloat::rmNearestTiesToEven, &Unused);
814 ConstValues.push_back(ConstantFP::get(ElemTy, APF));
815 }
816 return ConstantVector::get(ConstValues);
817}
818
819bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
820 // Table-Driven optimization
821 const TableRef tr = getOptTable(FInfo.getId());
822 if (tr.empty())
823 return false;
824
825 int const sz = (int)tr.size();
826 Value *opr0 = CI->getArgOperand(0);
827
828 int vecSize = getVecSize(FInfo);
829 if (vecSize > 1) {
830 // Vector version
831 Constant *CV = dyn_cast<Constant>(opr0);
832 if (CV && CV->getType()->isVectorTy()) {
834 Values.reserve(vecSize);
835 for (int eltNo = 0; eltNo < vecSize; ++eltNo) {
836 ConstantFP *eltval =
837 cast<ConstantFP>(CV->getAggregateElement((unsigned)eltNo));
838 auto MatchingRow = llvm::find_if(tr, [eltval](const TableEntry &entry) {
839 return eltval->isExactlyValue(entry.input);
840 });
841 if (MatchingRow == tr.end())
842 return false;
843 Values.push_back(APFloat(MatchingRow->result));
844 }
845 Constant *NewValues = getConstantFloatVector(Values, CI->getType());
846 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *NewValues << "\n");
847 replaceCall(CI, NewValues);
848 return true;
849 }
850 } else {
851 // Scalar version
852 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
853 for (int i = 0; i < sz; ++i) {
854 if (CF->isExactlyValue(tr[i].input)) {
855 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
856 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
857 replaceCall(CI, nval);
858 return true;
859 }
860 }
861 }
862 }
863
864 return false;
865}
866
867namespace llvm {
868static double log2(double V) {
869#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
870 return ::log2(V);
871#else
872 return log(V) / numbers::ln2;
873#endif
874}
875} // namespace llvm
876
877bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
878 const FuncInfo &FInfo) {
879 assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
880 FInfo.getId() == AMDGPULibFunc::EI_POW_FAST ||
881 FInfo.getId() == AMDGPULibFunc::EI_POWR ||
882 FInfo.getId() == AMDGPULibFunc::EI_POWR_FAST ||
883 FInfo.getId() == AMDGPULibFunc::EI_POWN ||
884 FInfo.getId() == AMDGPULibFunc::EI_POWN_FAST) &&
885 "fold_pow: encounter a wrong function call");
886
887 Module *M = B.GetInsertBlock()->getModule();
888 Type *eltType = FPOp->getType()->getScalarType();
889 Value *opr0 = FPOp->getOperand(0);
890 Value *opr1 = FPOp->getOperand(1);
891
892 const APFloat *CF = nullptr;
893 const APInt *CINT = nullptr;
894 if (!match(opr1, m_APFloatAllowPoison(CF)))
895 match(opr1, m_APIntAllowPoison(CINT));
896
897 // 0x1111111 means that we don't do anything for this call.
898 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
899
900 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
901 // pow/powr/pown(x, 0) == 1
902 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
903 Constant *cnval = ConstantFP::get(eltType, 1.0);
904 if (getVecSize(FInfo) > 1) {
905 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
906 }
907 replaceCall(FPOp, cnval);
908 return true;
909 }
910 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
911 // pow/powr/pown(x, 1.0) = x
912 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
913 replaceCall(FPOp, opr0);
914 return true;
915 }
916 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
917 // pow/powr/pown(x, 2.0) = x*x
918 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * "
919 << *opr0 << "\n");
920 Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
921 replaceCall(FPOp, nval);
922 return true;
923 }
924 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
925 // pow/powr/pown(x, -1.0) = 1.0/x
926 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n");
927 Constant *cnval = ConstantFP::get(eltType, 1.0);
928 if (getVecSize(FInfo) > 1) {
929 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
930 }
931 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
932 replaceCall(FPOp, nval);
933 return true;
934 }
935
936 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
937 // pow[r](x, [-]0.5) = sqrt(x)
938 bool issqrt = CF->isExactlyValue(0.5);
939 if (FunctionCallee FPExpr =
940 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
942 FInfo))) {
943 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName()
944 << '(' << *opr0 << ")\n");
945 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
946 : "__pow2rsqrt");
947 replaceCall(FPOp, nval);
948 return true;
949 }
950 }
951
952 if (!isUnsafeFiniteOnlyMath(FPOp))
953 return false;
954
955 // Unsafe Math optimization
956
957 // Remember that ci_opr1 is set if opr1 is integral
958 if (CF) {
959 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
960 ? (double)CF->convertToFloat()
961 : CF->convertToDouble();
962 int ival = (int)dval;
963 if ((double)ival == dval) {
964 ci_opr1 = ival;
965 } else
966 ci_opr1 = 0x11111111;
967 }
968
969 // pow/powr/pown(x, c) = [1/](x*x*..x); where
970 // trunc(c) == c && the number of x == c && |c| <= 12
971 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
972 if (abs_opr1 <= 12) {
973 Constant *cnval;
974 Value *nval;
975 if (abs_opr1 == 0) {
976 cnval = ConstantFP::get(eltType, 1.0);
977 if (getVecSize(FInfo) > 1) {
978 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
979 }
980 nval = cnval;
981 } else {
982 Value *valx2 = nullptr;
983 nval = nullptr;
984 while (abs_opr1 > 0) {
985 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
986 if (abs_opr1 & 1) {
987 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
988 }
989 abs_opr1 >>= 1;
990 }
991 }
992
993 if (ci_opr1 < 0) {
994 cnval = ConstantFP::get(eltType, 1.0);
995 if (getVecSize(FInfo) > 1) {
996 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
997 }
998 nval = B.CreateFDiv(cnval, nval, "__1powprod");
999 }
1000 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1001 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
1002 << ")\n");
1003 replaceCall(FPOp, nval);
1004 return true;
1005 }
1006
1007 // If we should use the generic intrinsic instead of emitting a libcall
1008 const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy();
1009
1010 // powr ---> exp2(y * log2(x))
1011 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
1012 FunctionCallee ExpExpr;
1013 if (ShouldUseIntrinsic)
1014 ExpExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::exp2,
1015 {FPOp->getType()});
1016 else {
1017 ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
1018 if (!ExpExpr)
1019 return false;
1020 }
1021
1022 bool needlog = false;
1023 bool needabs = false;
1024 bool needcopysign = false;
1025 Constant *cnval = nullptr;
1026 if (getVecSize(FInfo) == 1) {
1027 CF = nullptr;
1028 match(opr0, m_APFloatAllowPoison(CF));
1029
1030 if (CF) {
1031 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1032 ? (double)CF->convertToFloat()
1033 : CF->convertToDouble();
1034
1035 V = log2(std::abs(V));
1036 cnval = ConstantFP::get(eltType, V);
1037 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1038 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST) &&
1039 CF->isNegative();
1040 } else {
1041 needlog = true;
1042 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1043 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST;
1044 }
1045 } else {
1046 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
1047
1048 if (!CDV) {
1049 needlog = true;
1050 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1051 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST;
1052 } else {
1053 assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
1054 "Wrong vector size detected");
1055
1057 for (int i=0; i < getVecSize(FInfo); ++i) {
1058 double V = CDV->getElementAsAPFloat(i).convertToDouble();
1059 if (V < 0.0) needcopysign = true;
1060 V = log2(std::abs(V));
1061 DVal.push_back(V);
1062 }
1063 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1065 for (double D : DVal)
1066 FVal.push_back((float)D);
1067 ArrayRef<float> tmp(FVal);
1068 cnval = ConstantDataVector::get(M->getContext(), tmp);
1069 } else {
1070 ArrayRef<double> tmp(DVal);
1071 cnval = ConstantDataVector::get(M->getContext(), tmp);
1072 }
1073 }
1074 }
1075
1076 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW ||
1077 FInfo.getId() == AMDGPULibFunc::EI_POW_FAST)) {
1078 // We cannot handle corner cases for a general pow() function, give up
1079 // unless y is a constant integral value. Then proceed as if it were pown.
1080 if (!isKnownIntegral(opr1, SQ.getWithInstruction(cast<Instruction>(FPOp)),
1081 FPOp->getFastMathFlags()))
1082 return false;
1083 }
1084
1085 Value *nval;
1086 if (needabs) {
1087 nval = B.CreateFAbs(opr0, nullptr, "__fabs");
1088 } else {
1089 nval = cnval ? cnval : opr0;
1090 }
1091 if (needlog) {
1092 FunctionCallee LogExpr;
1093 if (ShouldUseIntrinsic) {
1094 LogExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::log2,
1095 {FPOp->getType()});
1096 } else {
1097 LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1098 if (!LogExpr)
1099 return false;
1100 }
1101
1102 nval = CreateCallEx(B,LogExpr, nval, "__log2");
1103 }
1104
1105 if (FInfo.getId() == AMDGPULibFunc::EI_POWN ||
1106 FInfo.getId() == AMDGPULibFunc::EI_POWN_FAST) {
1107 // convert int(32) to fp(f32 or f64)
1108 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1109 }
1110 nval = B.CreateFMul(opr1, nval, "__ylogx");
1111
1112 CallInst *Exp2Call = CreateCallEx(B, ExpExpr, nval, "__exp2");
1113
1114 // TODO: Generalized fpclass logic for pow
1116 if (FPOp->hasNoNaNs())
1117 KnownNot |= FPClassTest::fcNan;
1118
1119 Exp2Call->addRetAttr(
1120 Attribute::getWithNoFPClass(Exp2Call->getContext(), KnownNot));
1121 nval = Exp2Call;
1122
1123 if (needcopysign) {
1124 Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
1125 Type *nTy = FPOp->getType()->getWithNewType(nTyS);
1126 Value *opr_n = FPOp->getOperand(1);
1127 if (opr_n->getType()->getScalarType()->isIntegerTy())
1128 opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
1129 else
1130 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1131
1132 unsigned size = nTy->getScalarSizeInBits();
1133 Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1134 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1135
1136 nval = B.CreateCopySign(nval, B.CreateBitCast(sign, nval->getType()),
1137 nullptr, "__pow_sign");
1138 }
1139
1140 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1141 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1142 replaceCall(FPOp, nval);
1143
1144 return true;
1145}
1146
1147bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
1148 const FuncInfo &FInfo) {
1149 Value *opr0 = FPOp->getOperand(0);
1150 Value *opr1 = FPOp->getOperand(1);
1151
1152 const APInt *CINT = nullptr;
1153 if (!match(opr1, m_APIntAllowPoison(CINT)))
1154 return false;
1155
1156 Function *Parent = B.GetInsertBlock()->getParent();
1157
1158 int ci_opr1 = (int)CINT->getSExtValue();
1159 if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
1160 // rootn(x, 1) = x
1161 //
1162 // TODO: Insert constrained canonicalize for strictfp case.
1163 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
1164 replaceCall(FPOp, opr0);
1165 return true;
1166 }
1167
1168 Module *M = B.GetInsertBlock()->getModule();
1169
1170 CallInst *CI = cast<CallInst>(FPOp);
1171 if (ci_opr1 == 2 &&
1172 shouldReplaceLibcallWithIntrinsic(CI,
1173 /*AllowMinSizeF32=*/true,
1174 /*AllowF64=*/true)) {
1175 // rootn(x, 2) = sqrt(x)
1176 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n");
1177
1178 CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1179 NewCall->takeName(CI);
1180
1181 // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some
1182 // metadata.
1183 MDBuilder MDHelper(M->getContext());
1184 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1185 NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD);
1186
1187 replaceCall(CI, NewCall);
1188 return true;
1189 }
1190
1191 if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1192 if (FunctionCallee FPExpr =
1193 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1194 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0
1195 << ")\n");
1196 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1197 replaceCall(FPOp, nval);
1198 return true;
1199 }
1200 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1201 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n");
1202 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1203 opr0,
1204 "__rootn2div");
1205 replaceCall(FPOp, nval);
1206 return true;
1207 }
1208
1209 if (ci_opr1 == -2 &&
1210 shouldReplaceLibcallWithIntrinsic(CI,
1211 /*AllowMinSizeF32=*/true,
1212 /*AllowF64=*/true)) {
1213 // rootn(x, -2) = rsqrt(x)
1214
1215 // The original rootn had looser ulp requirements than the resultant sqrt
1216 // and fdiv.
1217 MDBuilder MDHelper(M->getContext());
1218 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1219
1220 // TODO: Could handle strictfp but need to fix strict sqrt emission
1221 FastMathFlags FMF = FPOp->getFastMathFlags();
1222 FMF.setAllowContract(true);
1223
1224 CallInst *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1226 B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), Sqrt));
1227 Sqrt->setFastMathFlags(FMF);
1228 RSqrt->setFastMathFlags(FMF);
1229 RSqrt->setMetadata(LLVMContext::MD_fpmath, FPMD);
1230
1231 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0
1232 << ")\n");
1233 replaceCall(CI, RSqrt);
1234 return true;
1235 }
1236
1237 return false;
1238}
1239
1240// is_integer(y) => trunc(y) == y
1242 Value *TruncY = B.CreateUnaryIntrinsic(Intrinsic::trunc, Y);
1243 return B.CreateFCmpOEQ(TruncY, Y);
1244}
1245
1247 // Even integers are still integers after division by 2.
1248 auto *HalfY = B.CreateFMul(Y, ConstantFP::get(Y->getType(), 0.5));
1249 return emitIsInteger(B, HalfY);
1250}
1251
1252// is_odd_integer(y) => is_integer(y) && !is_even_integer(y)
1254 Value *IsIntY = emitIsInteger(B, Y);
1255 Value *IsEvenY = emitIsEvenInteger(B, Y);
1256 Value *NotEvenY = B.CreateNot(IsEvenY);
1257 return B.CreateAnd(IsIntY, NotEvenY);
1258}
1259
1260// isinf(val) => fabs(val) == +inf
1262 auto *fabsVal = B.CreateFAbs(val);
1263 return B.CreateFCmpOEQ(fabsVal, ConstantFP::getInfinity(val->getType()));
1264}
1265
1266// y * log2(fabs(x))
1268 Value *AbsX = B.CreateFAbs(X);
1269 Value *LogAbsX = B.CreateUnaryIntrinsic(Intrinsic::log2, AbsX);
1270 Value *YTimesLogX = B.CreateFMul(Y, LogAbsX);
1271 return B.CreateUnaryIntrinsic(Intrinsic::exp2, YTimesLogX);
1272}
1273
1274/// Emit special case management epilog code for fast pow, powr, pown, and rootn
1275/// expansions. \p x and \p y should be the arguments to the library call
1276/// (possibly with some values clamped). \p expylnx should be the result to use
1277/// in normal circumstances.
1279 PowKind Kind) {
1280 Constant *Zero = ConstantFP::getZero(X->getType());
1281 Constant *One = ConstantFP::get(X->getType(), 1.0);
1282 Constant *QNaN = ConstantFP::getQNaN(X->getType());
1283 Constant *PInf = ConstantFP::getInfinity(X->getType());
1284
1285 switch (Kind) {
1286 case PowKind::Pow: {
1287 // is_odd_integer(y)
1288 Value *IsOddY = emitIsOddInteger(B, Y);
1289
1290 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1291 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1292 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1293
1294 // if (x < 0 && !is_integer(y)) ret = QNAN
1295 Value *IsIntY = emitIsInteger(B, Y);
1296 Value *condNegX = B.CreateFCmpOLT(X, Zero);
1297 Value *condNotIntY = B.CreateNot(IsIntY);
1298 Value *condNaN = B.CreateAnd(condNegX, condNotIntY);
1299 Ret = B.CreateSelect(condNaN, QNaN, Ret);
1300
1301 // if (isinf(ay)) { ... }
1302
1303 // FIXME: Missing backend optimization to save on materialization cost of
1304 // mixed sign constant infinities.
1305 Value *YIsInf = emitIsInf(B, Y);
1306
1307 Value *AY = B.CreateFAbs(Y);
1308 Value *YIsNegInf = B.CreateFCmpUNE(Y, AY);
1309
1310 Value *AX = B.CreateFAbs(X);
1311 Value *AxEqOne = B.CreateFCmpOEQ(AX, One);
1312 Value *AxLtOne = B.CreateFCmpOLT(AX, One);
1313 Value *XorCond = B.CreateXor(AxLtOne, YIsNegInf);
1314 Value *SelInf =
1315 B.CreateSelect(AxEqOne, AX, B.CreateSelect(XorCond, Zero, AY));
1316 Ret = B.CreateSelect(YIsInf, SelInf, Ret);
1317
1318 // if (isinf(ax) || x == 0.0f) { ... }
1319 Value *XIsInf = emitIsInf(B, X);
1320 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1321 Value *AxInfOrZero = B.CreateOr(XIsInf, XEqZero);
1322 Value *YLtZero = B.CreateFCmpOLT(Y, Zero);
1323 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1324 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1325 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1326 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1327 Ret = B.CreateSelect(AxInfOrZero, Copysign, Ret);
1328
1329 // if (isunordered(x, y)) ret = QNAN
1330 Value *isUnordered = B.CreateFCmpUNO(X, Y);
1331 return B.CreateSelect(isUnordered, QNaN, Ret);
1332 }
1333 case PowKind::PowR: {
1334 Value *YIsNeg = B.CreateFCmpOLT(Y, Zero);
1335 Value *IZ = B.CreateSelect(YIsNeg, PInf, Zero);
1336 Value *ZI = B.CreateSelect(YIsNeg, Zero, PInf);
1337
1338 Value *YEqZero = B.CreateFCmpOEQ(Y, Zero);
1339 Value *SelZeroCase = B.CreateSelect(YEqZero, QNaN, IZ);
1340 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1341 Value *Ret = B.CreateSelect(XEqZero, SelZeroCase, ExpYLnX);
1342
1343 Value *XEqInf = B.CreateFCmpOEQ(X, PInf);
1344 Value *YNeZero = B.CreateFCmpUNE(Y, Zero);
1345 Value *CondInfCase = B.CreateAnd(XEqInf, YNeZero);
1346 Ret = B.CreateSelect(CondInfCase, ZI, Ret);
1347
1348 Value *IsInfY = emitIsInf(B, Y);
1349 Value *XNeOne = B.CreateFCmpUNE(X, One);
1350 Value *CondInfY = B.CreateAnd(IsInfY, XNeOne);
1351 Value *XLtOne = B.CreateFCmpOLT(X, One);
1352 Value *SelInfYCase = B.CreateSelect(XLtOne, IZ, ZI);
1353 Ret = B.CreateSelect(CondInfY, SelInfYCase, Ret);
1354
1355 Value *IsUnordered = B.CreateFCmpUNO(X, Y);
1356 return B.CreateSelect(IsUnordered, QNaN, Ret);
1357 }
1358 case PowKind::PowN: {
1359 Constant *ZeroI = ConstantInt::get(Y->getType(), 0);
1360
1361 // is_odd_y = (ny & 1) != 0
1362 Value *OneI = ConstantInt::get(Y->getType(), 1);
1363 Value *YAnd1 = B.CreateAnd(Y, OneI);
1364 Value *IsOddY = B.CreateICmpNE(YAnd1, ZeroI);
1365
1366 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1367 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1368 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1369
1370 // if (isinf(x) || x == 0.0f)
1371 Value *FabsX = B.CreateFAbs(X);
1372 Value *XIsInf = B.CreateFCmpOEQ(FabsX, PInf);
1373 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1374 Value *InfOrZero = B.CreateOr(XIsInf, XEqZero);
1375
1376 // (x == 0.0f) ^ (ny < 0) ? 0.0f : +inf
1377 Value *YLtZero = B.CreateICmpSLT(Y, ZeroI);
1378 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1379 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1380
1381 // copysign(selVal, is_odd_y ? x : 0.0f)
1382 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1383 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1384
1385 return B.CreateSelect(InfOrZero, Copysign, Ret);
1386 }
1387 case PowKind::RootN: {
1388 Constant *ZeroI = ConstantInt::get(Y->getType(), 0);
1389
1390 // is_odd_y = (ny & 1) != 0
1391 Value *YAnd1 = B.CreateAnd(Y, ConstantInt::get(Y->getType(), 1));
1392 Value *IsOddY = B.CreateICmpNE(YAnd1, ZeroI);
1393
1394 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1395 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1396 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1397
1398 // if (isinf(x) || x == 0.0f)
1399 Value *FabsX = B.CreateFAbs(X);
1400 Value *IsInfX = B.CreateFCmpOEQ(FabsX, PInf);
1401 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1402 Value *CondInfOrZero = B.CreateOr(IsInfX, XEqZero);
1403
1404 // (x == 0.0f) ^ (ny < 0) ? 0.0f : +inf
1405 Value *YLtZero = B.CreateICmpSLT(Y, ZeroI);
1406 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1407 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1408
1409 // copysign(selVal, is_odd_y ? x : 0.0f)
1410 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1411 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1412
1413 Ret = B.CreateSelect(CondInfOrZero, Copysign, Ret);
1414
1415 // if ((x < 0.0f && !is_odd_y) || ny == 0) ret = QNAN
1416 Value *XIsNeg = B.CreateFCmpOLT(X, Zero);
1417 Value *NotOddY = B.CreateNot(IsOddY);
1418 Value *CondNegAndNotOdd = B.CreateAnd(XIsNeg, NotOddY);
1419 Value *YEqZero = B.CreateICmpEQ(Y, ZeroI);
1420 Value *CondBad = B.CreateOr(CondNegAndNotOdd, YEqZero);
1421 return B.CreateSelect(CondBad, QNaN, Ret);
1422 }
1423 }
1424
1425 llvm_unreachable("covered switch");
1426}
1427
1428// TODO: Move the fold_pow folding to sqrt/fdiv here
1429bool AMDGPULibCalls::expandFastPow(FPMathOperator *FPOp, IRBuilder<> &B,
1430 PowKind Kind) {
1431 Type *Ty = FPOp->getType();
1432
1433 // There's currently no reason to do this for half. The correct path is
1434 // promote to float and use the fast float expansion.
1435 //
1436 // TODO: We could move this expansion to lowering to get half pow to work.
1437 if (!Ty->getScalarType()->isFloatTy())
1438 return false;
1439
1440 // TODO: Verify optimization for double and bfloat.
1441 Value *X = FPOp->getOperand(0);
1442 Value *Y = FPOp->getOperand(1);
1443
1444 switch (Kind) {
1445 case PowKind::Pow: {
1446 Constant *One = ConstantFP::get(X->getType(), 1.0);
1447
1448 // if (x == 1.0f) y = 1.0f;
1449 Value *XEqOne = B.CreateFCmpOEQ(X, One);
1450 Y = B.CreateSelect(XEqOne, One, Y);
1451
1452 // if (y == 0.0f) x = 1.0f;
1453 Value *YEqZero = B.CreateFCmpOEQ(Y, ConstantFP::getZero(X->getType()));
1454 X = B.CreateSelect(YEqZero, One, X);
1455
1456 Value *ExpYLnX = emitFastExpYLnx(B, X, Y);
1457 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1458 replaceCall(FPOp, Fixed);
1459 return true;
1460 }
1461 case PowKind::PowR: {
1462 Value *NegX = B.CreateFCmpOLT(X, ConstantFP::getZero(X->getType()));
1463 X = B.CreateSelect(NegX, ConstantFP::getQNaN(X->getType()), X);
1464
1465 Value *ExpYLnX = emitFastExpYLnx(B, X, Y);
1466 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1467 replaceCall(FPOp, Fixed);
1468 return true;
1469 }
1470 case PowKind::PowN: {
1471 // ny == 0
1472 Value *YEqZero = B.CreateICmpEQ(Y, ConstantInt::get(Y->getType(), 0));
1473
1474 // x = (ny == 0 ? 1.0f : x)
1475 X = B.CreateSelect(YEqZero, ConstantFP::get(X->getType(), 1.0), X);
1476
1477 Value *CastY = B.CreateSIToFP(Y, X->getType());
1478 Value *ExpYLnX = emitFastExpYLnx(B, X, CastY);
1479 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1480 replaceCall(FPOp, Fixed);
1481 return true;
1482 }
1483 case PowKind::RootN: {
1484 Value *CastY = B.CreateSIToFP(Y, X->getType());
1485
1486 // This is afn anyway, so we will turn into rcp.
1487 Value *RcpY = B.CreateFDiv(ConstantFP::get(X->getType(), 1.0), CastY);
1488
1489 Value *ExpYLnX = emitFastExpYLnx(B, X, RcpY);
1490 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1491 replaceCall(FPOp, Fixed);
1492 return true;
1493 }
1494 }
1495 llvm_unreachable("Unhandled PowKind enum");
1496}
1497
1498bool AMDGPULibCalls::tryOptimizePow(FPMathOperator *FPOp, IRBuilder<> &B,
1499 const FuncInfo &FInfo) {
1500 FastMathFlags FMF = FPOp->getFastMathFlags();
1501 CallInst *Call = cast<CallInst>(FPOp);
1502 Module *M = Call->getModule();
1503
1504 FuncInfo PowrInfo;
1505 AMDGPULibFunc::EFuncId FastPowrFuncId =
1506 FMF.approxFunc() || FInfo.getId() == AMDGPULibFunc::EI_POW_FAST
1509 FunctionCallee PowrFunc = getFloatFastVariant(
1510 M, FInfo, PowrInfo, AMDGPULibFunc::EI_POWR, FastPowrFuncId);
1511
1512 // TODO: Prefer fast pown to fast powr, but slow powr to slow pown.
1513
1514 // pow(x, y) -> powr(x, y) for x >= -0.0
1515 // TODO: Account for flags on current call
1516 if (PowrFunc && cannotBeOrderedLessThanZero(FPOp->getOperand(0),
1517 SQ.getWithInstruction(Call))) {
1518 Call->setCalledFunction(PowrFunc);
1519 return fold_pow(FPOp, B, PowrInfo) || true;
1520 }
1521
1522 // pow(x, y) -> pown(x, y) for known integral y
1523 if (isKnownIntegral(FPOp->getOperand(1), SQ.getWithInstruction(Call),
1524 FPOp->getFastMathFlags())) {
1525 FunctionType *PownType = getPownType(Call->getFunctionType());
1526
1527 FuncInfo PownInfo;
1528 AMDGPULibFunc::EFuncId FastPownFuncId =
1529 FMF.approxFunc() || FInfo.getId() == AMDGPULibFunc::EI_POW_FAST
1532 FunctionCallee PownFunc = getFloatFastVariant(
1533 M, FInfo, PownInfo, AMDGPULibFunc::EI_POWN, FastPownFuncId);
1534
1535 if (PownFunc) {
1536 // TODO: If the incoming integral value is an sitofp/uitofp, it won't
1537 // fold out without a known range. We can probably take the source
1538 // value directly.
1539 Value *CastedArg =
1540 B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
1541 // Have to drop any nofpclass attributes on the original call site.
1543 1, AttributeFuncs::typeIncompatible(CastedArg->getType(),
1545 Call->setCalledFunction(PownFunc);
1546 Call->setArgOperand(1, CastedArg);
1547 return fold_pow(FPOp, B, PownInfo) || true;
1548 }
1549 }
1550
1551 if (fold_pow(FPOp, B, FInfo))
1552 return true;
1553
1554 if (!FMF.approxFunc())
1555 return false;
1556
1557 if (FInfo.getId() == AMDGPULibFunc::EI_POW && FMF.approxFunc() &&
1558 getArgType(FInfo) == AMDGPULibFunc::F32) {
1559 AMDGPULibFunc PowFastInfo(AMDGPULibFunc::EI_POW_FAST, FInfo);
1560 if (FunctionCallee PowFastFunc = getFunction(M, PowFastInfo)) {
1561 Call->setCalledFunction(PowFastFunc);
1562 return fold_pow(FPOp, B, PowFastInfo) || true;
1563 }
1564 }
1565
1566 return expandFastPow(FPOp, B, PowKind::Pow);
1567}
1568
1569// Get a scalar native builtin single argument FP function
1570FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1571 const FuncInfo &FInfo) {
1572 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1573 return nullptr;
1574 FuncInfo nf = FInfo;
1576 return getFunction(M, nf);
1577}
1578
1579// Some library calls are just wrappers around llvm intrinsics, but compiled
1580// conservatively. Preserve the flags from the original call site by
1581// substituting them with direct calls with all the flags.
1582bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
1583 bool AllowMinSizeF32,
1584 bool AllowF64,
1585 bool AllowStrictFP) {
1586 Type *FltTy = CI->getType()->getScalarType();
1587 const bool IsF32 = FltTy->isFloatTy();
1588
1589 // f64 intrinsics aren't implemented for most operations.
1590 if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy()))
1591 return false;
1592
1593 // We're implicitly inlining by replacing the libcall with the intrinsic, so
1594 // don't do it for noinline call sites.
1595 if (CI->isNoInline())
1596 return false;
1597
1598 const Function *ParentF = CI->getFunction();
1599 // TODO: Handle strictfp
1600 if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
1601 return false;
1602
1603 if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
1604 return false;
1605 return true;
1606}
1607
1608void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B,
1609 CallInst *CI,
1610 Intrinsic::ID IntrID) {
1611 if (CI->arg_size() == 2) {
1612 Value *Arg0 = CI->getArgOperand(0);
1613 Value *Arg1 = CI->getArgOperand(1);
1614 VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType());
1615 VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType());
1616 if (Arg0VecTy && !Arg1VecTy) {
1617 Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1618 CI->setArgOperand(1, SplatRHS);
1619 } else if (!Arg0VecTy && Arg1VecTy) {
1620 Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1621 CI->setArgOperand(0, SplatLHS);
1622 }
1623 }
1624
1626 CI->getModule(), IntrID, {CI->getType()}));
1628}
1629
1630bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1631 IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32,
1632 bool AllowF64, bool AllowStrictFP) {
1633 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1634 AllowStrictFP))
1635 return false;
1636 replaceLibCallWithSimpleIntrinsic(B, CI, IntrID);
1637 return true;
1638}
1639
1640std::tuple<Value *, Value *, Value *>
1641AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
1642 FunctionCallee Fsincos) {
1643 DebugLoc DL = B.getCurrentDebugLocation();
1644 Function *F = B.GetInsertBlock()->getParent();
1645 B.SetInsertPointPastAllocas(F);
1646
1647 AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_");
1648
1649 if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1650 // If the argument is an instruction, it must dominate all uses so put our
1651 // sincos call there. Otherwise, right after the allocas works well enough
1652 // if it's an argument or constant.
1653
1654 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1655
1656 // SetInsertPoint unwelcomely always tries to set the debug loc.
1657 B.SetCurrentDebugLocation(DL);
1658 }
1659
1660 Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1);
1661
1662 // The allocaInst allocates the memory in private address space. This need
1663 // to be addrspacecasted to point to the address space of cos pointer type.
1664 // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1665 Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy);
1666
1667 CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc);
1668
1669 // TODO: Is it worth trying to preserve the location for the cos calls for the
1670 // load?
1671
1672 LoadInst *LoadCos = B.CreateLoad(Arg->getType(), Alloc);
1673 return {SinCos, LoadCos, SinCos};
1674}
1675
1676// fold sin, cos -> sincos.
1677bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
1678 const FuncInfo &fInfo) {
1679 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1680 fInfo.getId() == AMDGPULibFunc::EI_COS);
1681
1682 if ((getArgType(fInfo) != AMDGPULibFunc::F32 &&
1683 getArgType(fInfo) != AMDGPULibFunc::F64) ||
1684 fInfo.getPrefix() != AMDGPULibFunc::NOPFX)
1685 return false;
1686
1687 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1688
1689 Value *CArgVal = FPOp->getOperand(0);
1690
1691 // TODO: Constant fold the call
1692 if (isa<ConstantData>(CArgVal))
1693 return false;
1694
1695 CallInst *CI = cast<CallInst>(FPOp);
1696
1697 Function *F = B.GetInsertBlock()->getParent();
1698 Module *M = F->getParent();
1699
1700 // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer
1701 // implementation. Prefer the private form if available.
1702 AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo);
1703 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1705
1706 AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo);
1707 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1709
1710 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1711 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1712 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1713 if (!FSinCos)
1714 return false;
1715
1716 SmallVector<CallInst *> SinCalls;
1717 SmallVector<CallInst *> CosCalls;
1718 SmallVector<CallInst *> SinCosCalls;
1719 FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
1720 fInfo);
1721 const std::string PairName = PartnerInfo.mangle();
1722
1723 StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName;
1724 StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName();
1725 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1726 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1727
1728 // Intersect the two sets of flags.
1729 FastMathFlags FMF = FPOp->getFastMathFlags();
1730 MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath);
1731
1732 SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()};
1733
1734 for (User* U : CArgVal->users()) {
1735 CallInst *XI = dyn_cast<CallInst>(U);
1736 if (!XI || XI->getFunction() != F || XI->isNoBuiltin())
1737 continue;
1738
1739 Function *UCallee = XI->getCalledFunction();
1740 if (!UCallee)
1741 continue;
1742
1743 bool Handled = true;
1744
1745 if (UCallee->getName() == SinName)
1746 SinCalls.push_back(XI);
1747 else if (UCallee->getName() == CosName)
1748 CosCalls.push_back(XI);
1749 else if (UCallee->getName() == SinCosPrivateName ||
1750 UCallee->getName() == SinCosGenericName)
1751 SinCosCalls.push_back(XI);
1752 else
1753 Handled = false;
1754
1755 if (Handled) {
1756 MergeDbgLocs.push_back(XI->getDebugLoc());
1757 auto *OtherOp = cast<FPMathOperator>(XI);
1758 FMF &= OtherOp->getFastMathFlags();
1760 FPMath, XI->getMetadata(LLVMContext::MD_fpmath));
1761 }
1762 }
1763
1764 if (SinCalls.empty() || CosCalls.empty())
1765 return false;
1766
1767 B.setFastMathFlags(FMF);
1768 B.setDefaultFPMathTag(FPMath);
1769 DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs);
1770 B.SetCurrentDebugLocation(DbgLoc);
1771
1772 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos);
1773
1774 auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) {
1775 for (CallInst *C : Calls)
1776 C->replaceAllUsesWith(Res);
1777
1778 // Leave the other dead instructions to avoid clobbering iterators.
1779 };
1780
1781 replaceTrigInsts(SinCalls, Sin);
1782 replaceTrigInsts(CosCalls, Cos);
1783 replaceTrigInsts(SinCosCalls, SinCos);
1784
1785 // It's safe to delete the original now.
1786 CI->eraseFromParent();
1787 return true;
1788}
1789
1790bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
1791 APFloat &Res0, APFloat &Res1,
1792 Constant *copr0, Constant *copr1) {
1793 // By default, opr0/opr1/opr3 holds values of float/double type.
1794 // If they are not float/double, each function has to its
1795 // operand separately.
1796 double opr0 = 0.0, opr1 = 0.0;
1799 if (fpopr0) {
1800 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1801 ? fpopr0->getValueAPF().convertToDouble()
1802 : (double)fpopr0->getValueAPF().convertToFloat();
1803 }
1804
1805 if (fpopr1) {
1806 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1807 ? fpopr1->getValueAPF().convertToDouble()
1808 : (double)fpopr1->getValueAPF().convertToFloat();
1809 }
1810
1811 switch (FInfo.getId()) {
1812 default:
1813 return false;
1814
1816 Res0 = APFloat{acos(opr0)};
1817 return true;
1818
1820 // acosh(x) == log(x + sqrt(x*x - 1))
1821 Res0 = APFloat{log(opr0 + sqrt(opr0 * opr0 - 1.0))};
1822 return true;
1823
1825 Res0 = APFloat{acos(opr0) / MATH_PI};
1826 return true;
1827
1829 Res0 = APFloat{asin(opr0)};
1830 return true;
1831
1833 // asinh(x) == log(x + sqrt(x*x + 1))
1834 Res0 = APFloat{log(opr0 + sqrt(opr0 * opr0 + 1.0))};
1835 return true;
1836
1838 Res0 = APFloat{asin(opr0) / MATH_PI};
1839 return true;
1840
1842 Res0 = APFloat{atan(opr0)};
1843 return true;
1844
1846 // atanh(x) == (log(x+1) - log(x-1))/2;
1847 Res0 = APFloat{(log(opr0 + 1.0) - log(opr0 - 1.0)) / 2.0};
1848 return true;
1849
1851 Res0 = APFloat{atan(opr0) / MATH_PI};
1852 return true;
1853
1855 Res0 =
1856 APFloat{(opr0 < 0.0) ? -pow(-opr0, 1.0 / 3.0) : pow(opr0, 1.0 / 3.0)};
1857 return true;
1858
1860 Res0 = APFloat{cos(opr0)};
1861 return true;
1862
1864 Res0 = APFloat{cosh(opr0)};
1865 return true;
1866
1868 Res0 = APFloat{cos(MATH_PI * opr0)};
1869 return true;
1870
1872 Res0 = APFloat{exp(opr0)};
1873 return true;
1874
1876 Res0 = APFloat{pow(2.0, opr0)};
1877 return true;
1878
1880 Res0 = APFloat{pow(10.0, opr0)};
1881 return true;
1882
1884 Res0 = APFloat{log(opr0)};
1885 return true;
1886
1888 Res0 = APFloat{log(opr0) / log(2.0)};
1889 return true;
1890
1892 Res0 = APFloat{log(opr0) / log(10.0)};
1893 return true;
1894
1896 Res0 = APFloat{1.0 / sqrt(opr0)};
1897 return true;
1898
1900 Res0 = APFloat{sin(opr0)};
1901 return true;
1902
1904 Res0 = APFloat{sinh(opr0)};
1905 return true;
1906
1908 Res0 = APFloat{sin(MATH_PI * opr0)};
1909 return true;
1910
1912 Res0 = APFloat{tan(opr0)};
1913 return true;
1914
1916 Res0 = APFloat{tanh(opr0)};
1917 return true;
1918
1920 Res0 = APFloat{tan(MATH_PI * opr0)};
1921 return true;
1922
1923 // two-arg functions
1926 Res0 = APFloat{pow(opr0, opr1)};
1927 return true;
1928
1930 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1931 double val = (double)iopr1->getSExtValue();
1932 Res0 = APFloat{pow(opr0, val)};
1933 return true;
1934 }
1935 return false;
1936 }
1937
1939 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1940 double val = (double)iopr1->getSExtValue();
1941 Res0 = APFloat{pow(opr0, 1.0 / val)};
1942 return true;
1943 }
1944 return false;
1945 }
1946
1947 // with ptr arg
1949 Res0 = APFloat{sin(opr0)};
1950 Res1 = APFloat{cos(opr0)};
1951 return true;
1952 }
1953
1954 return false;
1955}
1956
1957bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1958 int numArgs = (int)aCI->arg_size();
1959 if (numArgs > 3)
1960 return false;
1961
1962 Constant *copr0 = nullptr;
1963 Constant *copr1 = nullptr;
1964 if (numArgs > 0) {
1965 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1966 return false;
1967 }
1968
1969 if (numArgs > 1) {
1970 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1971 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1972 return false;
1973 }
1974 }
1975
1976 // At this point, all arguments to aCI are constants.
1977
1978 // max vector size is 16, and sincos will generate two results.
1979 SmallVector<APFloat, 16> Val0, Val1;
1980 int FuncVecSize = getVecSize(FInfo);
1981 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1982 if (FuncVecSize == 1) {
1983 if (!evaluateScalarMathFunc(FInfo, Val0.emplace_back(0.0),
1984 Val1.emplace_back(0.0), copr0, copr1)) {
1985 return false;
1986 }
1987 } else {
1988 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1989 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1990 for (int i = 0; i < FuncVecSize; ++i) {
1991 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1992 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1993 if (!evaluateScalarMathFunc(FInfo, Val0.emplace_back(0.0),
1994 Val1.emplace_back(0.0), celt0, celt1)) {
1995 return false;
1996 }
1997 }
1998 }
1999
2000 Constant *nval0, *nval1;
2001 if (FuncVecSize == 1) {
2002 nval0 = ConstantFP::get(aCI->getType(), Val0[0]);
2003 if (hasTwoResults)
2004 nval1 = ConstantFP::get(aCI->getType(), Val1[0]);
2005 } else {
2006 nval0 = getConstantFloatVector(Val0, aCI->getType());
2007 if (hasTwoResults)
2008 nval1 = getConstantFloatVector(Val1, aCI->getType());
2009 }
2010
2011 if (hasTwoResults) {
2012 // sincos
2013 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
2014 "math function with ptr arg not supported yet");
2015 new StoreInst(nval1, aCI->getArgOperand(1), aCI->getIterator());
2016 }
2017
2018 replaceCall(aCI, nval0);
2019 return true;
2020}
2021
2024 AMDGPULibCalls Simplifier(F, AM);
2025 Simplifier.initNativeFuncs();
2026
2027 bool Changed = false;
2028
2029 LLVM_DEBUG(dbgs() << "AMDIC: process function ";
2030 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
2031
2032 for (auto &BB : F) {
2033 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
2034 // Ignore non-calls.
2036 ++I;
2037
2038 if (CI) {
2039 if (Simplifier.fold(CI))
2040 Changed = true;
2041 }
2042 }
2043 }
2045}
2046
2049 if (UseNative.empty())
2050 return PreservedAnalyses::all();
2051
2052 AMDGPULibCalls Simplifier(F, AM);
2053 Simplifier.initNativeFuncs();
2054
2055 bool Changed = false;
2056 for (auto &BB : F) {
2057 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
2058 // Ignore non-calls.
2060 ++I;
2061 if (CI && Simplifier.useNative(CI))
2062 Changed = true;
2063 }
2064 }
2066}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static Constant * getConstantFloatVector(const ArrayRef< APFloat > Values, const Type *Ty)
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
#define MATH_SQRT2
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
static Value * emitIsInf(IRBuilder<> &B, Value *val)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static Value * emitFastExpYLnx(IRBuilder<> &B, Value *X, Value *Y)
static Value * emitIsInteger(IRBuilder<> &B, Value *Y)
static Value * emitIsEvenInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static Value * emitPowFixup(IRBuilder<> &B, Value *X, Value *Y, Value *ExpYLnX, PowKind Kind)
Emit special case management epilog code for fast pow, powr, pown, and rootn expansions.
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
#define MATH_E
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
#define MATH_PI
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
#define MATH_SQRT1_2
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static Value * emitIsOddInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
loop term fold
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
FunctionAnalysisManager FAM
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
bool fold(CallInst *CI)
static void replaceCall(Instruction *I, Value *With)
AMDGPULibCalls(Function &F, FunctionAnalysisManager &FAM)
bool useNative(CallInst *CI)
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
EFuncId getId() const
bool isMangled() const
Param * getLeads()
Get leading parameters for mangled lib functions.
void setId(EFuncId Id)
ENamePrefix getPrefix() const
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
bool isNegative() const
Definition APFloat.h:1538
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:5958
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1521
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:5986
bool isZero() const
Definition APFloat.h:1534
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
A function analysis which provides an AssumptionCache.
static LLVM_ABI Attribute getWithNoFPClass(LLVMContext &Context, FPClassTest Mask)
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
void setCallingConv(CallingConv::ID CC)
void removeParamAttrs(unsigned ArgNo, const AttributeMask &AttrsToRemove)
Removes the attributes from the given argument.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
AttributeSet getParamAttributes(unsigned ArgNo) const
Return the param attributes for this call.
bool isNoInline() const
Return true if the call should not be inlined.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI APFloat getElementAsAPFloat(uint64_t i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
LLVM_ABI Constant * getElementAsConstant(uint64_t i) const
Return a Constant for a specified index's element.
LLVM_ABI uint64_t getNumElements() const
Return the number of elements in the array or vector.
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
const APFloat & getValueAPF() const
Definition Constants.h:463
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
Definition Constants.h:186
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static LLVM_ABI DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
Definition Operator.h:286
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
Definition Operator.h:302
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
Definition Operator.h:307
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
Definition Operator.h:328
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setAllowContract(bool B=true)
Definition FMF.h:93
bool none() const
Definition FMF.h:60
bool approxFunc() const
Definition FMF.h:73
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetLibraryInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:144
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
void dropAllReferences()
Drop all references to operands.
Definition User.h:324
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI APInt pow(const APInt &X, int64_t N)
Compute X^N for N>=0.
Definition APInt.cpp:3207
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
constexpr double ln2
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI bool isKnownIntegral(const Value *V, const SimplifyQuery &SQ, FastMathFlags FMF)
Return true if the floating-point value V is known to be an integer value.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39