LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/GlobalValue.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsAArch64.h"
36#include "llvm/IR/IntrinsicsAMDGPU.h"
37#include "llvm/IR/IntrinsicsARM.h"
38#include "llvm/IR/IntrinsicsNVPTX.h"
39#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/IR/IntrinsicsWebAssembly.h"
41#include "llvm/IR/IntrinsicsX86.h"
42#include "llvm/IR/LLVMContext.h"
43#include "llvm/IR/MDBuilder.h"
44#include "llvm/IR/Metadata.h"
45#include "llvm/IR/Module.h"
46#include "llvm/IR/Value.h"
47#include "llvm/IR/Verifier.h"
53#include "llvm/Support/Regex.h"
56#include <cstdint>
57#include <cstring>
58#include <numeric>
59
60using namespace llvm;
61
62static cl::opt<bool>
63 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
64 cl::desc("Disable autoupgrade of debug info"));
65
66static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
67
68// Report a fatal error along with the
69// Call Instruction which caused the error
70[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
71 CallBase *CI) {
72 CI->print(llvm::errs());
73 llvm::errs() << "\n";
75}
76
77// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
78// changed their type from v4f32 to v2i64.
80 Function *&NewFn) {
81 // Check whether this is an old version of the function, which received
82 // v4f32 arguments.
83 Type *Arg0Type = F->getFunctionType()->getParamType(0);
84 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
85 return false;
86
87 // Yes, it's old, replace it with new version.
88 rename(F);
89 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
90 return true;
91}
92
93// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
94// arguments have changed their type from i32 to i8.
96 Function *&NewFn) {
97 // Check that the last argument is an i32.
98 Type *LastArgType = F->getFunctionType()->getParamType(
99 F->getFunctionType()->getNumParams() - 1);
100 if (!LastArgType->isIntegerTy(32))
101 return false;
102
103 // Move this function aside and map down.
104 rename(F);
105 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
106 return true;
107}
108
109// Upgrade the declaration of fp compare intrinsics that change return type
110// from scalar to vXi1 mask.
112 Function *&NewFn) {
113 // Check if the return type is a vector.
114 if (F->getReturnType()->isVectorTy())
115 return false;
116
117 rename(F);
118 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
119 return true;
120}
121
122// Upgrade the declaration of multiply and add bytes intrinsics whose input
123// arguments' types have changed from vectors of i32 to vectors of i8
125 Function *&NewFn) {
126 // check if input argument type is a vector of i8
127 Type *Arg1Type = F->getFunctionType()->getParamType(1);
128 Type *Arg2Type = F->getFunctionType()->getParamType(2);
129 if (Arg1Type->isVectorTy() &&
130 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
131 Arg2Type->isVectorTy() &&
132 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
133 return false;
134
135 rename(F);
136 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
137 return true;
138}
139
140// Upgrade the declaration of multipy and add words intrinsics whose input
141// arguments' types have changed to vectors of i32 to vectors of i16
143 Function *&NewFn) {
144 // check if input argument type is a vector of i16
145 Type *Arg1Type = F->getFunctionType()->getParamType(1);
146 Type *Arg2Type = F->getFunctionType()->getParamType(2);
147 if (Arg1Type->isVectorTy() &&
148 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
149 Arg2Type->isVectorTy() &&
150 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
151 return false;
152
153 rename(F);
154 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
155 return true;
156}
157
159 Function *&NewFn) {
160 if (F->getReturnType()->getScalarType()->isBFloatTy())
161 return false;
162
163 rename(F);
164 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
165 return true;
166}
167
169 Function *&NewFn) {
170 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
171 return false;
172
173 rename(F);
174 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
175 return true;
176}
177
179 // All of the intrinsics matches below should be marked with which llvm
180 // version started autoupgrading them. At some point in the future we would
181 // like to use this information to remove upgrade code for some older
182 // intrinsics. It is currently undecided how we will determine that future
183 // point.
184 if (Name.consume_front("avx."))
185 return (Name.starts_with("blend.p") || // Added in 3.7
186 Name == "cvt.ps2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.pd.256" || // Added in 3.9
188 Name == "cvtdq2.ps.256" || // Added in 7.0
189 Name.starts_with("movnt.") || // Added in 3.2
190 Name.starts_with("sqrt.p") || // Added in 7.0
191 Name.starts_with("storeu.") || // Added in 3.9
192 Name.starts_with("vbroadcast.s") || // Added in 3.5
193 Name.starts_with("vbroadcastf128") || // Added in 4.0
194 Name.starts_with("vextractf128.") || // Added in 3.7
195 Name.starts_with("vinsertf128.") || // Added in 3.7
196 Name.starts_with("vperm2f128.") || // Added in 6.0
197 Name.starts_with("vpermil.")); // Added in 3.1
198
199 if (Name.consume_front("avx2."))
200 return (Name == "movntdqa" || // Added in 5.0
201 Name.starts_with("pabs.") || // Added in 6.0
202 Name.starts_with("padds.") || // Added in 8.0
203 Name.starts_with("paddus.") || // Added in 8.0
204 Name.starts_with("pblendd.") || // Added in 3.7
205 Name == "pblendw" || // Added in 3.7
206 Name.starts_with("pbroadcast") || // Added in 3.8
207 Name.starts_with("pcmpeq.") || // Added in 3.1
208 Name.starts_with("pcmpgt.") || // Added in 3.1
209 Name.starts_with("pmax") || // Added in 3.9
210 Name.starts_with("pmin") || // Added in 3.9
211 Name.starts_with("pmovsx") || // Added in 3.9
212 Name.starts_with("pmovzx") || // Added in 3.9
213 Name == "pmul.dq" || // Added in 7.0
214 Name == "pmulu.dq" || // Added in 7.0
215 Name.starts_with("psll.dq") || // Added in 3.7
216 Name.starts_with("psrl.dq") || // Added in 3.7
217 Name.starts_with("psubs.") || // Added in 8.0
218 Name.starts_with("psubus.") || // Added in 8.0
219 Name.starts_with("vbroadcast") || // Added in 3.8
220 Name == "vbroadcasti128" || // Added in 3.7
221 Name == "vextracti128" || // Added in 3.7
222 Name == "vinserti128" || // Added in 3.7
223 Name == "vperm2i128"); // Added in 6.0
224
225 if (Name.consume_front("avx512.")) {
226 if (Name.consume_front("mask."))
227 // 'avx512.mask.*'
228 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("and.") || // Added in 3.9
230 Name.starts_with("andn.") || // Added in 3.9
231 Name.starts_with("broadcast.s") || // Added in 3.9
232 Name.starts_with("broadcastf32x4.") || // Added in 6.0
233 Name.starts_with("broadcastf32x8.") || // Added in 6.0
234 Name.starts_with("broadcastf64x2.") || // Added in 6.0
235 Name.starts_with("broadcastf64x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x4.") || // Added in 6.0
237 Name.starts_with("broadcasti32x8.") || // Added in 6.0
238 Name.starts_with("broadcasti64x2.") || // Added in 6.0
239 Name.starts_with("broadcasti64x4.") || // Added in 6.0
240 Name.starts_with("cmp.b") || // Added in 5.0
241 Name.starts_with("cmp.d") || // Added in 5.0
242 Name.starts_with("cmp.q") || // Added in 5.0
243 Name.starts_with("cmp.w") || // Added in 5.0
244 Name.starts_with("compress.b") || // Added in 9.0
245 Name.starts_with("compress.d") || // Added in 9.0
246 Name.starts_with("compress.p") || // Added in 9.0
247 Name.starts_with("compress.q") || // Added in 9.0
248 Name.starts_with("compress.store.") || // Added in 7.0
249 Name.starts_with("compress.w") || // Added in 9.0
250 Name.starts_with("conflict.") || // Added in 9.0
251 Name.starts_with("cvtdq2pd.") || // Added in 4.0
252 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
253 Name == "cvtpd2dq.256" || // Added in 7.0
254 Name == "cvtpd2ps.256" || // Added in 7.0
255 Name == "cvtps2pd.128" || // Added in 7.0
256 Name == "cvtps2pd.256" || // Added in 7.0
257 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
258 Name == "cvtqq2ps.256" || // Added in 9.0
259 Name == "cvtqq2ps.512" || // Added in 9.0
260 Name == "cvttpd2dq.256" || // Added in 7.0
261 Name == "cvttps2dq.128" || // Added in 7.0
262 Name == "cvttps2dq.256" || // Added in 7.0
263 Name.starts_with("cvtudq2pd.") || // Added in 4.0
264 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
265 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
266 Name == "cvtuqq2ps.256" || // Added in 9.0
267 Name == "cvtuqq2ps.512" || // Added in 9.0
268 Name.starts_with("dbpsadbw.") || // Added in 7.0
269 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
270 Name.starts_with("expand.b") || // Added in 9.0
271 Name.starts_with("expand.d") || // Added in 9.0
272 Name.starts_with("expand.load.") || // Added in 7.0
273 Name.starts_with("expand.p") || // Added in 9.0
274 Name.starts_with("expand.q") || // Added in 9.0
275 Name.starts_with("expand.w") || // Added in 9.0
276 Name.starts_with("fpclass.p") || // Added in 7.0
277 Name.starts_with("insert") || // Added in 4.0
278 Name.starts_with("load.") || // Added in 3.9
279 Name.starts_with("loadu.") || // Added in 3.9
280 Name.starts_with("lzcnt.") || // Added in 5.0
281 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
283 Name.starts_with("movddup") || // Added in 3.9
284 Name.starts_with("move.s") || // Added in 4.0
285 Name.starts_with("movshdup") || // Added in 3.9
286 Name.starts_with("movsldup") || // Added in 3.9
287 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
288 Name.starts_with("or.") || // Added in 3.9
289 Name.starts_with("pabs.") || // Added in 6.0
290 Name.starts_with("packssdw.") || // Added in 5.0
291 Name.starts_with("packsswb.") || // Added in 5.0
292 Name.starts_with("packusdw.") || // Added in 5.0
293 Name.starts_with("packuswb.") || // Added in 5.0
294 Name.starts_with("padd.") || // Added in 4.0
295 Name.starts_with("padds.") || // Added in 8.0
296 Name.starts_with("paddus.") || // Added in 8.0
297 Name.starts_with("palignr.") || // Added in 3.9
298 Name.starts_with("pand.") || // Added in 3.9
299 Name.starts_with("pandn.") || // Added in 3.9
300 Name.starts_with("pavg") || // Added in 6.0
301 Name.starts_with("pbroadcast") || // Added in 6.0
302 Name.starts_with("pcmpeq.") || // Added in 3.9
303 Name.starts_with("pcmpgt.") || // Added in 3.9
304 Name.starts_with("perm.df.") || // Added in 3.9
305 Name.starts_with("perm.di.") || // Added in 3.9
306 Name.starts_with("permvar.") || // Added in 7.0
307 Name.starts_with("pmaddubs.w.") || // Added in 7.0
308 Name.starts_with("pmaddw.d.") || // Added in 7.0
309 Name.starts_with("pmax") || // Added in 4.0
310 Name.starts_with("pmin") || // Added in 4.0
311 Name == "pmov.qd.256" || // Added in 9.0
312 Name == "pmov.qd.512" || // Added in 9.0
313 Name == "pmov.wb.256" || // Added in 9.0
314 Name == "pmov.wb.512" || // Added in 9.0
315 Name.starts_with("pmovsx") || // Added in 4.0
316 Name.starts_with("pmovzx") || // Added in 4.0
317 Name.starts_with("pmul.dq.") || // Added in 4.0
318 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
319 Name.starts_with("pmulh.w.") || // Added in 7.0
320 Name.starts_with("pmulhu.w.") || // Added in 7.0
321 Name.starts_with("pmull.") || // Added in 4.0
322 Name.starts_with("pmultishift.qb.") || // Added in 8.0
323 Name.starts_with("pmulu.dq.") || // Added in 4.0
324 Name.starts_with("por.") || // Added in 3.9
325 Name.starts_with("prol.") || // Added in 8.0
326 Name.starts_with("prolv.") || // Added in 8.0
327 Name.starts_with("pror.") || // Added in 8.0
328 Name.starts_with("prorv.") || // Added in 8.0
329 Name.starts_with("pshuf.b.") || // Added in 4.0
330 Name.starts_with("pshuf.d.") || // Added in 3.9
331 Name.starts_with("pshufh.w.") || // Added in 3.9
332 Name.starts_with("pshufl.w.") || // Added in 3.9
333 Name.starts_with("psll.d") || // Added in 4.0
334 Name.starts_with("psll.q") || // Added in 4.0
335 Name.starts_with("psll.w") || // Added in 4.0
336 Name.starts_with("pslli") || // Added in 4.0
337 Name.starts_with("psllv") || // Added in 4.0
338 Name.starts_with("psra.d") || // Added in 4.0
339 Name.starts_with("psra.q") || // Added in 4.0
340 Name.starts_with("psra.w") || // Added in 4.0
341 Name.starts_with("psrai") || // Added in 4.0
342 Name.starts_with("psrav") || // Added in 4.0
343 Name.starts_with("psrl.d") || // Added in 4.0
344 Name.starts_with("psrl.q") || // Added in 4.0
345 Name.starts_with("psrl.w") || // Added in 4.0
346 Name.starts_with("psrli") || // Added in 4.0
347 Name.starts_with("psrlv") || // Added in 4.0
348 Name.starts_with("psub.") || // Added in 4.0
349 Name.starts_with("psubs.") || // Added in 8.0
350 Name.starts_with("psubus.") || // Added in 8.0
351 Name.starts_with("pternlog.") || // Added in 7.0
352 Name.starts_with("punpckh") || // Added in 3.9
353 Name.starts_with("punpckl") || // Added in 3.9
354 Name.starts_with("pxor.") || // Added in 3.9
355 Name.starts_with("shuf.f") || // Added in 6.0
356 Name.starts_with("shuf.i") || // Added in 6.0
357 Name.starts_with("shuf.p") || // Added in 4.0
358 Name.starts_with("sqrt.p") || // Added in 7.0
359 Name.starts_with("store.b.") || // Added in 3.9
360 Name.starts_with("store.d.") || // Added in 3.9
361 Name.starts_with("store.p") || // Added in 3.9
362 Name.starts_with("store.q.") || // Added in 3.9
363 Name.starts_with("store.w.") || // Added in 3.9
364 Name == "store.ss" || // Added in 7.0
365 Name.starts_with("storeu.") || // Added in 3.9
366 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
367 Name.starts_with("ucmp.") || // Added in 5.0
368 Name.starts_with("unpckh.") || // Added in 3.9
369 Name.starts_with("unpckl.") || // Added in 3.9
370 Name.starts_with("valign.") || // Added in 4.0
371 Name == "vcvtph2ps.128" || // Added in 11.0
372 Name == "vcvtph2ps.256" || // Added in 11.0
373 Name.starts_with("vextract") || // Added in 4.0
374 Name.starts_with("vfmadd.") || // Added in 7.0
375 Name.starts_with("vfmaddsub.") || // Added in 7.0
376 Name.starts_with("vfnmadd.") || // Added in 7.0
377 Name.starts_with("vfnmsub.") || // Added in 7.0
378 Name.starts_with("vpdpbusd.") || // Added in 7.0
379 Name.starts_with("vpdpbusds.") || // Added in 7.0
380 Name.starts_with("vpdpwssd.") || // Added in 7.0
381 Name.starts_with("vpdpwssds.") || // Added in 7.0
382 Name.starts_with("vpermi2var.") || // Added in 7.0
383 Name.starts_with("vpermil.p") || // Added in 3.9
384 Name.starts_with("vpermilvar.") || // Added in 4.0
385 Name.starts_with("vpermt2var.") || // Added in 7.0
386 Name.starts_with("vpmadd52") || // Added in 7.0
387 Name.starts_with("vpshld.") || // Added in 7.0
388 Name.starts_with("vpshldv.") || // Added in 8.0
389 Name.starts_with("vpshrd.") || // Added in 7.0
390 Name.starts_with("vpshrdv.") || // Added in 8.0
391 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
392 Name.starts_with("xor.")); // Added in 3.9
393
394 if (Name.consume_front("mask3."))
395 // 'avx512.mask3.*'
396 return (Name.starts_with("vfmadd.") || // Added in 7.0
397 Name.starts_with("vfmaddsub.") || // Added in 7.0
398 Name.starts_with("vfmsub.") || // Added in 7.0
399 Name.starts_with("vfmsubadd.") || // Added in 7.0
400 Name.starts_with("vfnmsub.")); // Added in 7.0
401
402 if (Name.consume_front("maskz."))
403 // 'avx512.maskz.*'
404 return (Name.starts_with("pternlog.") || // Added in 7.0
405 Name.starts_with("vfmadd.") || // Added in 7.0
406 Name.starts_with("vfmaddsub.") || // Added in 7.0
407 Name.starts_with("vpdpbusd.") || // Added in 7.0
408 Name.starts_with("vpdpbusds.") || // Added in 7.0
409 Name.starts_with("vpdpwssd.") || // Added in 7.0
410 Name.starts_with("vpdpwssds.") || // Added in 7.0
411 Name.starts_with("vpermt2var.") || // Added in 7.0
412 Name.starts_with("vpmadd52") || // Added in 7.0
413 Name.starts_with("vpshldv.") || // Added in 8.0
414 Name.starts_with("vpshrdv.")); // Added in 8.0
415
416 // 'avx512.*'
417 return (Name == "movntdqa" || // Added in 5.0
418 Name == "pmul.dq.512" || // Added in 7.0
419 Name == "pmulu.dq.512" || // Added in 7.0
420 Name.starts_with("broadcastm") || // Added in 6.0
421 Name.starts_with("cmp.p") || // Added in 12.0
422 Name.starts_with("cvtb2mask.") || // Added in 7.0
423 Name.starts_with("cvtd2mask.") || // Added in 7.0
424 Name.starts_with("cvtmask2") || // Added in 5.0
425 Name.starts_with("cvtq2mask.") || // Added in 7.0
426 Name == "cvtusi2sd" || // Added in 7.0
427 Name.starts_with("cvtw2mask.") || // Added in 7.0
428 Name == "kand.w" || // Added in 7.0
429 Name == "kandn.w" || // Added in 7.0
430 Name == "knot.w" || // Added in 7.0
431 Name == "kor.w" || // Added in 7.0
432 Name == "kortestc.w" || // Added in 7.0
433 Name == "kortestz.w" || // Added in 7.0
434 Name.starts_with("kunpck") || // added in 6.0
435 Name == "kxnor.w" || // Added in 7.0
436 Name == "kxor.w" || // Added in 7.0
437 Name.starts_with("padds.") || // Added in 8.0
438 Name.starts_with("pbroadcast") || // Added in 3.9
439 Name.starts_with("prol") || // Added in 8.0
440 Name.starts_with("pror") || // Added in 8.0
441 Name.starts_with("psll.dq") || // Added in 3.9
442 Name.starts_with("psrl.dq") || // Added in 3.9
443 Name.starts_with("psubs.") || // Added in 8.0
444 Name.starts_with("ptestm") || // Added in 6.0
445 Name.starts_with("ptestnm") || // Added in 6.0
446 Name.starts_with("storent.") || // Added in 3.9
447 Name.starts_with("vbroadcast.s") || // Added in 7.0
448 Name.starts_with("vpshld.") || // Added in 8.0
449 Name.starts_with("vpshrd.")); // Added in 8.0
450 }
451
452 if (Name.consume_front("fma."))
453 return (Name.starts_with("vfmadd.") || // Added in 7.0
454 Name.starts_with("vfmsub.") || // Added in 7.0
455 Name.starts_with("vfmsubadd.") || // Added in 7.0
456 Name.starts_with("vfnmadd.") || // Added in 7.0
457 Name.starts_with("vfnmsub.")); // Added in 7.0
458
459 if (Name.consume_front("fma4."))
460 return Name.starts_with("vfmadd.s"); // Added in 7.0
461
462 if (Name.consume_front("sse."))
463 return (Name == "add.ss" || // Added in 4.0
464 Name == "cvtsi2ss" || // Added in 7.0
465 Name == "cvtsi642ss" || // Added in 7.0
466 Name == "div.ss" || // Added in 4.0
467 Name == "mul.ss" || // Added in 4.0
468 Name.starts_with("sqrt.p") || // Added in 7.0
469 Name == "sqrt.ss" || // Added in 7.0
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.ss"); // Added in 4.0
472
473 if (Name.consume_front("sse2."))
474 return (Name == "add.sd" || // Added in 4.0
475 Name == "cvtdq2pd" || // Added in 3.9
476 Name == "cvtdq2ps" || // Added in 7.0
477 Name == "cvtps2pd" || // Added in 3.9
478 Name == "cvtsi2sd" || // Added in 7.0
479 Name == "cvtsi642sd" || // Added in 7.0
480 Name == "cvtss2sd" || // Added in 7.0
481 Name == "div.sd" || // Added in 4.0
482 Name == "mul.sd" || // Added in 4.0
483 Name.starts_with("padds.") || // Added in 8.0
484 Name.starts_with("paddus.") || // Added in 8.0
485 Name.starts_with("pcmpeq.") || // Added in 3.1
486 Name.starts_with("pcmpgt.") || // Added in 3.1
487 Name == "pmaxs.w" || // Added in 3.9
488 Name == "pmaxu.b" || // Added in 3.9
489 Name == "pmins.w" || // Added in 3.9
490 Name == "pminu.b" || // Added in 3.9
491 Name == "pmulu.dq" || // Added in 7.0
492 Name.starts_with("pshuf") || // Added in 3.9
493 Name.starts_with("psll.dq") || // Added in 3.7
494 Name.starts_with("psrl.dq") || // Added in 3.7
495 Name.starts_with("psubs.") || // Added in 8.0
496 Name.starts_with("psubus.") || // Added in 8.0
497 Name.starts_with("sqrt.p") || // Added in 7.0
498 Name == "sqrt.sd" || // Added in 7.0
499 Name == "storel.dq" || // Added in 3.9
500 Name.starts_with("storeu.") || // Added in 3.9
501 Name == "sub.sd"); // Added in 4.0
502
503 if (Name.consume_front("sse41."))
504 return (Name.starts_with("blendp") || // Added in 3.7
505 Name == "movntdqa" || // Added in 5.0
506 Name == "pblendw" || // Added in 3.7
507 Name == "pmaxsb" || // Added in 3.9
508 Name == "pmaxsd" || // Added in 3.9
509 Name == "pmaxud" || // Added in 3.9
510 Name == "pmaxuw" || // Added in 3.9
511 Name == "pminsb" || // Added in 3.9
512 Name == "pminsd" || // Added in 3.9
513 Name == "pminud" || // Added in 3.9
514 Name == "pminuw" || // Added in 3.9
515 Name.starts_with("pmovsx") || // Added in 3.8
516 Name.starts_with("pmovzx") || // Added in 3.9
517 Name == "pmuldq"); // Added in 7.0
518
519 if (Name.consume_front("sse42."))
520 return Name == "crc32.64.8"; // Added in 3.4
521
522 if (Name.consume_front("sse4a."))
523 return Name.starts_with("movnt."); // Added in 3.9
524
525 if (Name.consume_front("ssse3."))
526 return (Name == "pabs.b.128" || // Added in 6.0
527 Name == "pabs.d.128" || // Added in 6.0
528 Name == "pabs.w.128"); // Added in 6.0
529
530 if (Name.consume_front("xop."))
531 return (Name == "vpcmov" || // Added in 3.8
532 Name == "vpcmov.256" || // Added in 5.0
533 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
534 Name.starts_with("vprot")); // Added in 8.0
535
536 if (Name.consume_front("bmi."))
537 return (Name.starts_with("pdep.") || // Added in 23.0
538 Name.starts_with("pext.")); // Added in 23.0
539
540 return (Name == "addcarry.u32" || // Added in 8.0
541 Name == "addcarry.u64" || // Added in 8.0
542 Name == "addcarryx.u32" || // Added in 8.0
543 Name == "addcarryx.u64" || // Added in 8.0
544 Name == "subborrow.u32" || // Added in 8.0
545 Name == "subborrow.u64" || // Added in 8.0
546 Name.starts_with("vcvtph2ps.")); // Added in 11.0
547}
548
550 Function *&NewFn) {
551 // Only handle intrinsics that start with "x86.".
552 if (!Name.consume_front("x86."))
553 return false;
554
555 if (shouldUpgradeX86Intrinsic(F, Name)) {
556 NewFn = nullptr;
557 return true;
558 }
559
560 if (Name == "rdtscp") { // Added in 8.0
561 // If this intrinsic has 0 operands, it's the new version.
562 if (F->getFunctionType()->getNumParams() == 0)
563 return false;
564
565 rename(F);
566 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
567 Intrinsic::x86_rdtscp);
568 return true;
569 }
570
572
573 // SSE4.1 ptest functions may have an old signature.
574 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
576 .Case("c", Intrinsic::x86_sse41_ptestc)
577 .Case("z", Intrinsic::x86_sse41_ptestz)
578 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
581 return upgradePTESTIntrinsic(F, ID, NewFn);
582
583 return false;
584 }
585
586 // Several blend and other instructions with masks used the wrong number of
587 // bits.
588
589 // Added in 3.6
591 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
592 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
593 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
594 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
595 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
596 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
599 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
600
601 if (Name.consume_front("avx512.")) {
602 if (Name.consume_front("mask.cmp.")) {
603 // Added in 7.0
605 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
606 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
607 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
608 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
609 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
610 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
613 return upgradeX86MaskedFPCompare(F, ID, NewFn);
614 } else if (Name.starts_with("vpdpbusd.") ||
615 Name.starts_with("vpdpbusds.")) {
616 // Added in 21.1
618 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
619 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
620 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
621 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
622 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
623 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
626 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
627 } else if (Name.starts_with("vpdpwssd.") ||
628 Name.starts_with("vpdpwssds.")) {
629 // Added in 21.1
631 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
632 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
633 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
634 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
635 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
636 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 return upgradeX86MultiplyAddWords(F, ID, NewFn);
640 }
641 return false; // No other 'x86.avx512.*'.
642 }
643
644 if (Name.consume_front("avx2.")) {
645 if (Name.consume_front("vpdpb")) {
646 // Added in 21.1
648 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
649 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
650 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
651 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
652 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
653 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
654 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
655 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
656 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
657 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
658 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
659 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
662 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
663 } else if (Name.consume_front("vpdpw")) {
664 // Added in 21.1
666 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
667 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
668 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
669 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
670 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
671 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
672 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
673 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
674 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
675 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
676 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
677 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 return upgradeX86MultiplyAddWords(F, ID, NewFn);
681 }
682 return false; // No other 'x86.avx2.*'
683 }
684
685 if (Name.consume_front("avx10.")) {
686 if (Name.consume_front("vpdpb")) {
687 // Added in 21.1
689 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
690 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
691 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
692 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
693 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
694 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
697 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
698 } else if (Name.consume_front("vpdpw")) {
700 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
701 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
702 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
703 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
704 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
705 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 return upgradeX86MultiplyAddWords(F, ID, NewFn);
709 }
710 return false; // No other 'x86.avx10.*'
711 }
712
713 if (Name.consume_front("avx512bf16.")) {
714 // Added in 9.0
716 .Case("cvtne2ps2bf16.128",
717 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
718 .Case("cvtne2ps2bf16.256",
719 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
720 .Case("cvtne2ps2bf16.512",
721 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
722 .Case("mask.cvtneps2bf16.128",
723 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
724 .Case("cvtneps2bf16.256",
725 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
726 .Case("cvtneps2bf16.512",
727 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
730 return upgradeX86BF16Intrinsic(F, ID, NewFn);
731
732 // Added in 9.0
734 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
735 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
736 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
739 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
740 return false; // No other 'x86.avx512bf16.*'.
741 }
742
743 if (Name.consume_front("xop.")) {
745 if (Name.starts_with("vpermil2")) { // Added in 3.9
746 // Upgrade any XOP PERMIL2 index operand still using a float/double
747 // vector.
748 auto Idx = F->getFunctionType()->getParamType(2);
749 if (Idx->isFPOrFPVectorTy()) {
750 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
751 unsigned EltSize = Idx->getScalarSizeInBits();
752 if (EltSize == 64 && IdxSize == 128)
753 ID = Intrinsic::x86_xop_vpermil2pd;
754 else if (EltSize == 32 && IdxSize == 128)
755 ID = Intrinsic::x86_xop_vpermil2ps;
756 else if (EltSize == 64 && IdxSize == 256)
757 ID = Intrinsic::x86_xop_vpermil2pd_256;
758 else
759 ID = Intrinsic::x86_xop_vpermil2ps_256;
760 }
761 } else if (F->arg_size() == 2)
762 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
764 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
765 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
767
769 rename(F);
770 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
771 return true;
772 }
773 return false; // No other 'x86.xop.*'
774 }
775
776 if (Name == "seh.recoverfp") {
777 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
778 Intrinsic::eh_recoverfp);
779 return true;
780 }
781
782 return false;
783}
784
785// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
786// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
788 StringRef Name,
789 Function *&NewFn) {
790 if (Name.starts_with("rbit")) {
791 // '(arm|aarch64).rbit'.
793 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
794 return true;
795 }
796
797 if (Name == "thread.pointer") {
798 // '(arm|aarch64).thread.pointer'.
800 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
801 return true;
802 }
803
804 bool Neon = Name.consume_front("neon.");
805 if (Neon) {
806 // '(arm|aarch64).neon.*'.
807 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
808 // v16i8 respectively.
809 if (Name.consume_front("bfdot.")) {
810 // (arm|aarch64).neon.bfdot.*'.
813 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
814 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
815 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
818 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
819 assert((OperandWidth == 64 || OperandWidth == 128) &&
820 "Unexpected operand width");
821 LLVMContext &Ctx = F->getParent()->getContext();
822 std::array<Type *, 2> Tys{
823 {F->getReturnType(),
824 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
825 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
826 return true;
827 }
828 return false; // No other '(arm|aarch64).neon.bfdot.*'.
829 }
830
831 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
832 // anymore and accept v8bf16 instead of v16i8.
833 if (Name.consume_front("bfm")) {
834 // (arm|aarch64).neon.bfm*'.
835 if (Name.consume_back(".v4f32.v16i8")) {
836 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
839 .Case("mla",
840 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
841 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
842 .Case("lalb",
843 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
844 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
845 .Case("lalt",
846 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
847 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
850 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
851 return true;
852 }
853 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
854 }
855 return false; // No other '(arm|aarch64).neon.bfm*.
856 }
857 // Continue on to Aarch64 Neon or Arm Neon.
858 }
859 // Continue on to Arm or Aarch64.
860
861 if (IsArm) {
862 // 'arm.*'.
863 if (Neon) {
864 // 'arm.neon.*'.
866 .StartsWith("vclz.", Intrinsic::ctlz)
867 .StartsWith("vcnt.", Intrinsic::ctpop)
868 .StartsWith("vqadds.", Intrinsic::sadd_sat)
869 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
870 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
871 .StartsWith("vqsubu.", Intrinsic::usub_sat)
872 .StartsWith("vrinta.", Intrinsic::round)
873 .StartsWith("vrintn.", Intrinsic::roundeven)
874 .StartsWith("vrintm.", Intrinsic::floor)
875 .StartsWith("vrintp.", Intrinsic::ceil)
876 .StartsWith("vrintx.", Intrinsic::rint)
877 .StartsWith("vrintz.", Intrinsic::trunc)
880 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
881 F->arg_begin()->getType());
882 return true;
883 }
884
885 if (Name.consume_front("vst")) {
886 // 'arm.neon.vst*'.
887 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
889 if (vstRegex.match(Name, &Groups)) {
890 static const Intrinsic::ID StoreInts[] = {
891 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
892 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
893
894 static const Intrinsic::ID StoreLaneInts[] = {
895 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
896 Intrinsic::arm_neon_vst4lane};
897
898 auto fArgs = F->getFunctionType()->params();
899 Type *Tys[] = {fArgs[0], fArgs[1]};
900 if (Groups[1].size() == 1)
902 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
903 else
905 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
906 return true;
907 }
908 return false; // No other 'arm.neon.vst*'.
909 }
910
911 return false; // No other 'arm.neon.*'.
912 }
913
914 if (Name.consume_front("mve.")) {
915 // 'arm.mve.*'.
916 if (Name == "vctp64") {
917 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
918 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
919 // the function and deal with it below in UpgradeIntrinsicCall.
920 rename(F);
921 return true;
922 }
923 return false; // Not 'arm.mve.vctp64'.
924 }
925
926 if (Name.starts_with("vrintn.v")) {
928 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
929 return true;
930 }
931
932 // These too are changed to accept a v2i1 instead of the old v4i1.
933 if (Name.consume_back(".v4i1")) {
934 // 'arm.mve.*.v4i1'.
935 if (Name.consume_back(".predicated.v2i64.v4i32"))
936 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
937 return Name == "mull.int" || Name == "vqdmull";
938
939 if (Name.consume_back(".v2i64")) {
940 // 'arm.mve.*.v2i64.v4i1'
941 bool IsGather = Name.consume_front("vldr.gather.");
942 if (IsGather || Name.consume_front("vstr.scatter.")) {
943 if (Name.consume_front("base.")) {
944 // Optional 'wb.' prefix.
945 Name.consume_front("wb.");
946 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
947 // predicated.v2i64.v2i64.v4i1'.
948 return Name == "predicated.v2i64";
949 }
950
951 if (Name.consume_front("offset.predicated."))
952 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
953 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
954
955 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
956 return false;
957 }
958
959 return false; // No other 'arm.mve.*.v2i64.v4i1'.
960 }
961 return false; // No other 'arm.mve.*.v4i1'.
962 }
963 return false; // No other 'arm.mve.*'.
964 }
965
966 if (Name.consume_front("cde.vcx")) {
967 // 'arm.cde.vcx*'.
968 if (Name.consume_back(".predicated.v2i64.v4i1"))
969 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
970 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
971 Name == "3q" || Name == "3qa";
972
973 return false; // No other 'arm.cde.vcx*'.
974 }
975 } else {
976 // 'aarch64.*'.
977 if (Neon) {
978 // 'aarch64.neon.*'.
980 .StartsWith("frintn", Intrinsic::roundeven)
981 .StartsWith("rbit", Intrinsic::bitreverse)
984 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
985 F->arg_begin()->getType());
986 return true;
987 }
988
989 if (Name.starts_with("addp")) {
990 // 'aarch64.neon.addp*'.
991 if (F->arg_size() != 2)
992 return false; // Invalid IR.
993 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
994 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
996 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 return true;
998 }
999 }
1000
1001 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
1002 if (Name.starts_with("bfcvt")) {
1003 NewFn = nullptr;
1004 return true;
1005 }
1006
1007 // vcvtfp2hf and vcvthf2fp -> fpext and fptrunc
1008 if (Name == "vcvtfp2hf" || Name == "vcvthf2fp") {
1009 NewFn = nullptr;
1010 return true;
1011 }
1012
1013 return false; // No other 'aarch64.neon.*'.
1014 }
1015 if (Name.consume_front("sve.")) {
1016 // 'aarch64.sve.*'.
1017 if (Name.consume_front("bf")) {
1018 if (Name == "mmla") {
1019 Type *Tys[] = {F->getReturnType(),
1020 std::next(F->arg_begin())->getType()};
1022 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1023 return true;
1024 }
1025 if (Name.consume_back(".lane")) {
1026 // 'aarch64.sve.bf*.lane'.
1029 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1030 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1031 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1034 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1035 return true;
1036 }
1037 return false; // No other 'aarch64.sve.bf*.lane'.
1038 }
1039 return false; // No other 'aarch64.sve.bf*'.
1040 }
1041
1042 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1043 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1044 NewFn = nullptr;
1045 return true;
1046 }
1047
1048 if (Name.consume_front("addqv")) {
1049 // 'aarch64.sve.addqv'.
1050 if (!F->getReturnType()->isFPOrFPVectorTy())
1051 return false;
1052
1053 auto Args = F->getFunctionType()->params();
1054 Type *Tys[] = {F->getReturnType(), Args[1]};
1056 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1057 return true;
1058 }
1059
1060 if (Name.consume_front("ld")) {
1061 // 'aarch64.sve.ld*'.
1062 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1063 if (LdRegex.match(Name)) {
1064 Type *ScalarTy =
1065 cast<VectorType>(F->getReturnType())->getElementType();
1066 ElementCount EC =
1067 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1068 assert(F->arg_size() == 2 &&
1069 "Expected 2 arguments for ld* intrinsic.");
1070 Type *PtrTy = F->getArg(1)->getType();
1071 Type *Ty = VectorType::get(ScalarTy, EC);
1072 static const Intrinsic::ID LoadIDs[] = {
1073 Intrinsic::aarch64_sve_ld2_sret,
1074 Intrinsic::aarch64_sve_ld3_sret,
1075 Intrinsic::aarch64_sve_ld4_sret,
1076 };
1078 F->getParent(), LoadIDs[Name[0] - '2'], {Ty, PtrTy});
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.ld*'.
1082 }
1083
1084 if (Name.consume_front("tuple.")) {
1085 // 'aarch64.sve.tuple.*'.
1086 if (Name.starts_with("get")) {
1087 // 'aarch64.sve.tuple.get*'.
1088 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1090 F->getParent(), Intrinsic::vector_extract, Tys);
1091 return true;
1092 }
1093
1094 if (Name.starts_with("set")) {
1095 // 'aarch64.sve.tuple.set*'.
1096 auto Args = F->getFunctionType()->params();
1097 Type *Tys[] = {Args[0], Args[2], Args[1]};
1099 F->getParent(), Intrinsic::vector_insert, Tys);
1100 return true;
1101 }
1102
1103 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1104 if (CreateTupleRegex.match(Name)) {
1105 // 'aarch64.sve.tuple.create*'.
1106 auto Args = F->getFunctionType()->params();
1107 Type *Tys[] = {F->getReturnType(), Args[1]};
1109 F->getParent(), Intrinsic::vector_insert, Tys);
1110 return true;
1111 }
1112 return false; // No other 'aarch64.sve.tuple.*'.
1113 }
1114
1115 if (Name.starts_with("rev.nxv")) {
1116 // 'aarch64.sve.rev.<Ty>'
1118 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1119 return true;
1120 }
1121
1122 return false; // No other 'aarch64.sve.*'.
1123 }
1124 if (Name.consume_front("sme.")) {
1125 // 'aarch64.sme.*'.
1126 if (Name.consume_front("ftmopa.")) {
1127 // The FP8 FTMOPA intrinsics were split out from the non-FP8 FTMOPA
1128 // intrinsics to model their FPMR dependency.
1131 .Case("za16.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za16)
1132 .Case("za32.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za32)
1135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1136 return true;
1137 }
1138 return false; // No other 'aarch64.sme.ftmopa.*'.
1139 }
1140
1141 return false; // No other 'aarch64.sme.*'.
1142 }
1143 }
1144 return false; // No other 'arm.*', 'aarch64.*'.
1145}
1146
1148 StringRef Name) {
1149 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1152 .Case("im2col.3d",
1153 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1154 .Case("im2col.4d",
1155 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1156 .Case("im2col.5d",
1157 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1158 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1159 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1160 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1161 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1162 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1164
1166 return ID;
1167
1168 // These intrinsics may need upgrade for two reasons:
1169 // (1) When the address-space of the first argument is shared[AS=3]
1170 // (and we upgrade it to use shared_cluster address-space[AS=7])
1171 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1173 return ID;
1174
1175 // (2) When there are only two boolean flag arguments at the end:
1176 //
1177 // The last three parameters of the older version of these
1178 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1179 //
1180 // The newer version reads as:
1181 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1182 //
1183 // So, when the type of the [N-3]rd argument is "not i1", then
1184 // it is the older version and we need to upgrade.
1185 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1186 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1187 if (!ArgType->isIntegerTy(1))
1188 return ID;
1189 }
1190
1192}
1193
1195 StringRef Name) {
1196 if (Name.consume_front("mapa.shared.cluster"))
1197 if (F->getReturnType()->getPointerAddressSpace() ==
1199 return Intrinsic::nvvm_mapa_shared_cluster;
1200
1201 if (Name.consume_front("cp.async.bulk.")) {
1204 .Case("global.to.shared.cluster",
1205 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1206 .Case("shared.cta.to.cluster",
1207 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1209
1211 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1213 return ID;
1214 }
1215
1217}
1218
1220 if (Name.consume_front("fma.rn."))
1221 return StringSwitch<Intrinsic::ID>(Name)
1222 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1223 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1224 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1225 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1227
1228 if (Name.consume_front("fmax."))
1229 return StringSwitch<Intrinsic::ID>(Name)
1230 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1231 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1232 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1233 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1234 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1235 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1236 .Case("ftz.nan.xorsign.abs.bf16",
1237 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1238 .Case("ftz.nan.xorsign.abs.bf16x2",
1239 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1240 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1241 .Case("ftz.xorsign.abs.bf16x2",
1242 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1243 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1244 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1245 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1246 .Case("nan.xorsign.abs.bf16x2",
1247 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1248 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1249 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1251
1252 if (Name.consume_front("fmin."))
1253 return StringSwitch<Intrinsic::ID>(Name)
1254 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1255 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1256 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1257 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1258 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1259 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1260 .Case("ftz.nan.xorsign.abs.bf16",
1261 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1262 .Case("ftz.nan.xorsign.abs.bf16x2",
1263 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1264 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1265 .Case("ftz.xorsign.abs.bf16x2",
1266 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1267 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1268 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1269 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1270 .Case("nan.xorsign.abs.bf16x2",
1271 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1272 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1273 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1275
1276 if (Name.consume_front("neg."))
1277 return StringSwitch<Intrinsic::ID>(Name)
1278 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1279 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1281
1283}
1284
1286 return Name.consume_front("local") || Name.consume_front("shared") ||
1287 Name.consume_front("global") || Name.consume_front("constant") ||
1288 Name.consume_front("param");
1289}
1290
1292 const FunctionType *FuncTy) {
1293 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1294 if (Name.starts_with("to.fp16")) {
1295 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1296 HalfTy) &&
1297 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1298 FuncTy->getReturnType());
1299 }
1300
1301 if (Name.starts_with("from.fp16")) {
1302 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1303 HalfTy) &&
1304 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1305 FuncTy->getReturnType());
1306 }
1307
1308 return false;
1309}
1310
1313 if (IID == Intrinsic::not_intrinsic)
1314 return false;
1315
1316 auto [FirstDefault, Defaults] = Intrinsic::getAllDefaultArgValues(IID);
1317 if (Defaults.empty())
1318 return false;
1319
1320 // Overloaded intrinsics are out of scope for the default-arg feature
1321 // and will be supported in a follow-up.
1322 if (Intrinsic::isOverloaded(IID))
1323 return false;
1324
1325 // Get the canonical full declaration for this intrinsic.
1326 Function *FullDecl = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1327
1328 // If the existing declaration already has all args, nothing to upgrade
1329 if (F->arg_size() >= FullDecl->arg_size())
1330 return false;
1331
1332 // Defaults are a contiguous trailing block, so checking the first missing
1333 // argument is enough.
1334 if (F->arg_size() < FirstDefault)
1335 return false;
1336
1337 NewFn = FullDecl;
1338 return true;
1339}
1340
1342 bool CanUpgradeDebugIntrinsicsToRecords) {
1343 assert(F && "Illegal to upgrade a non-existent Function.");
1344
1345 StringRef Name = F->getName();
1346
1347 // Quickly eliminate it, if it's not a candidate.
1348 if (!Name.consume_front("llvm.") || Name.empty())
1349 return false;
1350
1351 switch (Name[0]) {
1352 default: break;
1353 case 'a': {
1354 bool IsArm = Name.consume_front("arm.");
1355 if (IsArm || Name.consume_front("aarch64.")) {
1356 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1357 return true;
1358 break;
1359 }
1360
1361 if (Name.consume_front("amdgcn.")) {
1362 if (Name == "alignbit") {
1363 // Target specific intrinsic became redundant
1365 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1366 return true;
1367 }
1368
1369 if (Name.consume_front("atomic.")) {
1370 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1371 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1372 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1373 // and usub_sat so there's no new declaration.
1374 NewFn = nullptr;
1375 return true;
1376 }
1377 break; // No other 'amdgcn.atomic.*'
1378 }
1379
1380 switch (F->getIntrinsicID()) {
1381 default:
1382 break;
1383 // Legacy wmma iu intrinsics without the optional clamp operand.
1384 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1385 if (F->arg_size() == 7) {
1386 NewFn = nullptr;
1387 return true;
1388 }
1389 break;
1390 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1391 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1392 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1393 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1394 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1395 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1396 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1397 if (F->arg_size() == 8) {
1398 NewFn = nullptr;
1399 return true;
1400 }
1401 break;
1402 }
1403
1404 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1405 Name.consume_front("flat.atomic.")) {
1406 if (Name.starts_with("fadd") ||
1407 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1408 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1409 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1410 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1411 // declaration.
1412 NewFn = nullptr;
1413 return true;
1414 }
1415 }
1416
1417 if (Name.starts_with("ldexp.")) {
1418 // Target specific intrinsic became redundant
1420 F->getParent(), Intrinsic::ldexp,
1421 {F->getReturnType(), F->getArg(1)->getType()});
1422 return true;
1423 }
1424 break; // No other 'amdgcn.*'
1425 }
1426
1427 break;
1428 }
1429 case 'c': {
1430 if (F->arg_size() == 1) {
1431 if (Name.consume_front("convert.")) {
1432 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1433 NewFn = nullptr;
1434 return true;
1435 }
1436 }
1437
1439 .StartsWith("ctlz.", Intrinsic::ctlz)
1440 .StartsWith("cttz.", Intrinsic::cttz)
1443 rename(F);
1444 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1445 F->arg_begin()->getType());
1446 return true;
1447 }
1448 }
1449
1450 if (F->arg_size() == 2 && Name == "coro.end") {
1451 rename(F);
1452 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1453 Intrinsic::coro_end);
1454 return true;
1455 }
1456
1457 break;
1458 }
1459 case 'd':
1460 if (Name.consume_front("dbg.")) {
1461 // Mark debug intrinsics for upgrade to new debug format.
1462 if (CanUpgradeDebugIntrinsicsToRecords) {
1463 if (Name == "addr" || Name == "value" || Name == "assign" ||
1464 Name == "declare" || Name == "label") {
1465 // There's no function to replace these with.
1466 NewFn = nullptr;
1467 // But we do want these to get upgraded.
1468 return true;
1469 }
1470 }
1471 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1472 // converted to DbgVariableRecords later.
1473 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1474 rename(F);
1475 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1476 Intrinsic::dbg_value);
1477 return true;
1478 }
1479 break; // No other 'dbg.*'.
1480 }
1481 break;
1482 case 'e':
1483 if (Name.consume_front("experimental.vector.")) {
1486 // Skip over extract.last.active, otherwise it will be 'upgraded'
1487 // to a regular vector extract which is a different operation.
1488 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1489 .StartsWith("extract.", Intrinsic::vector_extract)
1490 .StartsWith("insert.", Intrinsic::vector_insert)
1491 .StartsWith("reverse.", Intrinsic::vector_reverse)
1492 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1493 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1494 .StartsWith("partial.reduce.add",
1495 Intrinsic::vector_partial_reduce_add)
1498 const auto *FT = F->getFunctionType();
1500 if (ID == Intrinsic::vector_extract ||
1501 ID == Intrinsic::vector_interleave2)
1502 // Extracting overloads the return type.
1503 Tys.push_back(FT->getReturnType());
1504 if (ID != Intrinsic::vector_interleave2)
1505 Tys.push_back(FT->getParamType(0));
1506 if (ID == Intrinsic::vector_insert ||
1507 ID == Intrinsic::vector_partial_reduce_add)
1508 // Inserting overloads the inserted type.
1509 Tys.push_back(FT->getParamType(1));
1510 rename(F);
1511 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1512 return true;
1513 }
1514
1515 if (Name.consume_front("reduce.")) {
1517 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1518 if (R.match(Name, &Groups))
1520 .Case("add", Intrinsic::vector_reduce_add)
1521 .Case("mul", Intrinsic::vector_reduce_mul)
1522 .Case("and", Intrinsic::vector_reduce_and)
1523 .Case("or", Intrinsic::vector_reduce_or)
1524 .Case("xor", Intrinsic::vector_reduce_xor)
1525 .Case("smax", Intrinsic::vector_reduce_smax)
1526 .Case("smin", Intrinsic::vector_reduce_smin)
1527 .Case("umax", Intrinsic::vector_reduce_umax)
1528 .Case("umin", Intrinsic::vector_reduce_umin)
1529 .Case("fmax", Intrinsic::vector_reduce_fmax)
1530 .Case("fmin", Intrinsic::vector_reduce_fmin)
1532
1533 bool V2 = false;
1535 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1536 Groups.clear();
1537 V2 = true;
1538 if (R2.match(Name, &Groups))
1540 .Case("fadd", Intrinsic::vector_reduce_fadd)
1541 .Case("fmul", Intrinsic::vector_reduce_fmul)
1543 }
1545 rename(F);
1546 auto Args = F->getFunctionType()->params();
1547 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1548 {Args[V2 ? 1 : 0]});
1549 return true;
1550 }
1551 break; // No other 'expermental.vector.reduce.*'.
1552 }
1553
1554 if (Name.consume_front("splice"))
1555 return true;
1556 break; // No other 'experimental.vector.*'.
1557 }
1558 if (Name.consume_front("experimental.stepvector.")) {
1559 Intrinsic::ID ID = Intrinsic::stepvector;
1560 rename(F);
1562 F->getParent(), ID, F->getFunctionType()->getReturnType());
1563 return true;
1564 }
1565 break; // No other 'e*'.
1566 case 'f':
1567 if (Name.starts_with("flt.rounds")) {
1568 rename(F);
1569 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1570 Intrinsic::get_rounding);
1571 return true;
1572 }
1573 break;
1574 case 'i':
1575 if (Name.starts_with("invariant.group.barrier")) {
1576 // Rename invariant.group.barrier to launder.invariant.group
1577 auto Args = F->getFunctionType()->params();
1578 Type* ObjectPtr[1] = {Args[0]};
1579 rename(F);
1581 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1582 return true;
1583 }
1584 break;
1585 case 'l': {
1586 bool IsLifetimeStart = Name.consume_front("lifetime.start");
1587 bool IsLifetimeEnd = !IsLifetimeStart && Name.consume_front("lifetime.end");
1588 if (IsLifetimeStart || IsLifetimeEnd) {
1589 if (F->arg_size() == 2) {
1590 Intrinsic::ID IID = IsLifetimeStart ? Intrinsic::lifetime_start
1591 : Intrinsic::lifetime_end;
1592 rename(F);
1593 // Old 2 argument form of these intrinsics have [Size, Ptr] as
1594 // arguments. Use the Ptr argument to create new declaration.
1595 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1596 F->getArg(1)->getType());
1597 return true;
1598 } else if (F->arg_size() == 1 && Name == ".i64") {
1599 // Matches @llvm.lifetime.{start/end}.i64 which used to be created by
1600 // Autoupgrade prior to
1601 // https://github.com/llvm/llvm-project/pull/204601. This is an invalid
1602 // intrinsic with no expected calls. To allow auto-upgrade process to
1603 // delete such invalid intrinsic declaration, set NewFn = nullptr
1604 // and return true here. If there are actual calls to this intrinsic
1605 // (which is not expected), they will be deleted in
1606 // UpgradeIntrinsicCall.
1607 NewFn = nullptr;
1608 return true;
1609 }
1610 }
1611 break;
1612 }
1613 case 'm': {
1614 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1615 // alignment parameter to embedding the alignment as an attribute of
1616 // the pointer args.
1617 if (unsigned ID = StringSwitch<unsigned>(Name)
1618 .StartsWith("memcpy.", Intrinsic::memcpy)
1619 .StartsWith("memmove.", Intrinsic::memmove)
1620 .Default(0)) {
1621 if (F->arg_size() == 5) {
1622 rename(F);
1623 // Get the types of dest, src, and len
1624 ArrayRef<Type *> ParamTypes =
1625 F->getFunctionType()->params().slice(0, 3);
1626 NewFn =
1627 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1628 return true;
1629 }
1630 }
1631 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1632 rename(F);
1633 // Get the types of dest, and len
1634 const auto *FT = F->getFunctionType();
1635 Type *ParamTypes[2] = {
1636 FT->getParamType(0), // Dest
1637 FT->getParamType(2) // len
1638 };
1639 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1640 Intrinsic::memset, ParamTypes);
1641 return true;
1642 }
1643
1644 unsigned MaskedID =
1646 .StartsWith("masked.load", Intrinsic::masked_load)
1647 .StartsWith("masked.gather", Intrinsic::masked_gather)
1648 .StartsWith("masked.store", Intrinsic::masked_store)
1649 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1650 .Default(0);
1651 if (MaskedID && F->arg_size() == 4) {
1652 rename(F);
1653 if (MaskedID == Intrinsic::masked_load ||
1654 MaskedID == Intrinsic::masked_gather) {
1656 F->getParent(), MaskedID,
1657 {F->getReturnType(), F->getArg(0)->getType()});
1658 return true;
1659 }
1661 F->getParent(), MaskedID,
1662 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1663 return true;
1664 }
1665 break;
1666 }
1667 case 'n': {
1668 if (Name.consume_front("nvvm.")) {
1669 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1670 if (F->arg_size() == 1) {
1671 Intrinsic::ID IID =
1673 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1674 .Case("clz.i", Intrinsic::ctlz)
1675 .Case("popc.i", Intrinsic::ctpop)
1677 if (IID != Intrinsic::not_intrinsic) {
1678 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1679 {F->getReturnType()});
1680 return true;
1681 }
1682 } else if (F->arg_size() == 2) {
1683 Intrinsic::ID IID =
1685 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1686 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1687 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1688 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1690 if (IID != Intrinsic::not_intrinsic) {
1691 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1692 {F->getReturnType()});
1693 return true;
1694 }
1695 }
1696
1697 // Check for nvvm intrinsics that need a return type adjustment.
1698 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1700 if (IID != Intrinsic::not_intrinsic) {
1701 NewFn = nullptr;
1702 return true;
1703 }
1704 }
1705
1706 // Upgrade Distributed Shared Memory Intrinsics
1708 if (IID != Intrinsic::not_intrinsic) {
1709 rename(F);
1710 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1711 return true;
1712 }
1713
1714 // Upgrade TMA copy G2S Intrinsics
1716 if (IID != Intrinsic::not_intrinsic) {
1717 rename(F);
1718 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1719 return true;
1720 }
1721
1722 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1723 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1724 //
1725 // TODO: We could add lohi.i2d.
1726 bool Expand = false;
1727 if (Name.consume_front("abs."))
1728 // nvvm.abs.{i,ii}
1729 Expand =
1730 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1731 else if (Name.consume_front("fabs."))
1732 // nvvm.fabs.{f,ftz.f,d}
1733 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1734 else if (Name.consume_front("ex2.approx."))
1735 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1736 Expand =
1737 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1738 else if (Name.consume_front("atomic.load."))
1739 // nvvm.atomic.load.add.{f32,f64}.p
1740 // nvvm.atomic.load.{inc,dec}.32.p
1741 Expand = StringSwitch<bool>(Name)
1742 .StartsWith("add.f32.p", true)
1743 .StartsWith("add.f64.p", true)
1744 .StartsWith("inc.32.p", true)
1745 .StartsWith("dec.32.p", true)
1746 .Default(false);
1747 else if (Name.consume_front("atomic."))
1748 // nvvm.atomic.{add,exch,max,min,inc,dec,and,or,xor}.gen.{i,f}.{cta,sys}
1749 // nvvm.atomic.cas.gen.i.{cta,sys}
1750 Expand = StringSwitch<bool>(Name)
1751 .StartsWith("add.gen.", true)
1752 .StartsWith("exch.gen.", true)
1753 .StartsWith("max.gen.", true)
1754 .StartsWith("min.gen.", true)
1755 .StartsWith("inc.gen.", true)
1756 .StartsWith("dec.gen.", true)
1757 .StartsWith("and.gen.", true)
1758 .StartsWith("or.gen.", true)
1759 .StartsWith("xor.gen.", true)
1760 .StartsWith("cas.gen.", true)
1761 .Default(false);
1762 else if (Name.consume_front("bitcast."))
1763 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1764 Expand =
1765 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1766 else if (Name.consume_front("rotate."))
1767 // nvvm.rotate.{b32,b64,right.b64}
1768 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1769 else if (Name.consume_front("ptr.gen.to."))
1770 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1771 Expand = consumeNVVMPtrAddrSpace(Name);
1772 else if (Name.consume_front("ptr."))
1773 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1774 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1775 else if (Name.consume_front("ldg.global."))
1776 // nvvm.ldg.global.{i,p,f}
1777 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1778 Name.starts_with("p."));
1779 else
1780 Expand = StringSwitch<bool>(Name)
1781 .Case("barrier0", true)
1782 .Case("barrier.n", true)
1783 .Case("barrier.sync.cnt", true)
1784 .Case("barrier.sync", true)
1785 .Case("barrier", true)
1786 .Case("bar.sync", true)
1787 .Case("barrier0.popc", true)
1788 .Case("barrier0.and", true)
1789 .Case("barrier0.or", true)
1790 .Case("clz.ll", true)
1791 .Case("popc.ll", true)
1792 .Case("h2f", true)
1793 .Case("swap.lo.hi.b64", true)
1794 .Case("tanh.approx.f32", true)
1795 .Default(false);
1796
1797 if (Expand) {
1798 NewFn = nullptr;
1799 return true;
1800 }
1801 break; // No other 'nvvm.*'.
1802 }
1803 break;
1804 }
1805 case 'o':
1806 if (Name.starts_with("objectsize.")) {
1807 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1808 if (F->arg_size() == 2 || F->arg_size() == 3) {
1809 rename(F);
1810 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1811 Intrinsic::objectsize, Tys);
1812 return true;
1813 }
1814 }
1815 break;
1816
1817 case 'p':
1818 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1819 rename(F);
1821 F->getParent(), Intrinsic::ptr_annotation,
1822 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1823 return true;
1824 }
1825 break;
1826
1827 case 'r': {
1828 if (Name.consume_front("riscv.")) {
1831 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1832 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1833 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1834 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1837 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1838 rename(F);
1839 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1840 return true;
1841 }
1842 break; // No other applicable upgrades.
1843 }
1844
1846 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1847 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1850 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1851 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1852 rename(F);
1853 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1854 return true;
1855 }
1856 break; // No other applicable upgrades.
1857 }
1858
1860 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1861 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1862 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1863 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1864 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1865 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1868 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1869 rename(F);
1870 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1871 return true;
1872 }
1873 break; // No other applicable upgrades.
1874 }
1875
1876 // Replace llvm.riscv.clmul with llvm.clmul.
1877 if (Name == "clmul.i32" || Name == "clmul.i64") {
1879 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1880 return true;
1881 }
1882
1883 break; // No other 'riscv.*' intrinsics
1884 }
1885 } break;
1886
1887 case 's':
1888 if (Name == "stackprotectorcheck") {
1889 NewFn = nullptr;
1890 return true;
1891 }
1892 break;
1893
1894 case 't':
1895 if (Name == "thread.pointer") {
1897 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1898 return true;
1899 }
1900 break;
1901
1902 case 'v': {
1903 if (Name == "var.annotation" && F->arg_size() == 4) {
1904 rename(F);
1906 F->getParent(), Intrinsic::var_annotation,
1907 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1908 return true;
1909 }
1910 if (Name.consume_front("vector.splice")) {
1911 if (Name.starts_with(".left") || Name.starts_with(".right"))
1912 break;
1913 return true;
1914 }
1915 break;
1916 }
1917
1918 case 'w':
1919 if (Name.consume_front("wasm.")) {
1922 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1923 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1924 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1927 rename(F);
1928 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1929 F->getReturnType());
1930 return true;
1931 }
1932
1933 if (Name.consume_front("dot.i8x16.i7x16.")) {
1935 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1936 .Case("add.signed",
1937 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1940 rename(F);
1941 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1942 return true;
1943 }
1944 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1945 }
1946 break; // No other 'wasm.*'.
1947 }
1948 break;
1949
1950 case 'x':
1951 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1952 return true;
1953 }
1954
1955 auto *ST = dyn_cast<StructType>(F->getReturnType());
1956 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1957 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1958 // Replace return type with literal non-packed struct. Only do this for
1959 // intrinsics declared to return a struct, not for intrinsics with
1960 // overloaded return type, in which case the exact struct type will be
1961 // mangled into the name.
1962 if (Intrinsic::hasStructReturnType(F->getIntrinsicID())) {
1963 FunctionType *FT = F->getFunctionType();
1964 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1965 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1966 std::string Name = F->getName().str();
1967 rename(F);
1968 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1969 Name, F->getParent());
1970
1971 // The new function may also need remangling.
1972 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1973 NewFn = *Result;
1974 return true;
1975 }
1976 }
1977
1978 // Remangle our intrinsic since we upgrade the mangling
1980 if (Result != std::nullopt) {
1981 NewFn = *Result;
1982 return true;
1983 }
1984
1985 // This may not belong here. This function is effectively being overloaded
1986 // to both detect an intrinsic which needs upgrading, and to provide the
1987 // upgraded form of the intrinsic. We should perhaps have two separate
1988 // functions for this.
1990 return true;
1991
1992 return false;
1993}
1994
1996 bool CanUpgradeDebugIntrinsicsToRecords) {
1997 NewFn = nullptr;
1998 bool Upgraded =
1999 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
2000
2001 // Upgrade intrinsic attributes. This does not change the function.
2002 if (NewFn)
2003 F = NewFn;
2004 if (Intrinsic::ID id = F->getIntrinsicID()) {
2005 // Only do this if the intrinsic signature is valid.
2006 SmallVector<Type *> OverloadTys;
2007 if (Intrinsic::isSignatureValid(id, F->getFunctionType(), OverloadTys))
2008 F->setAttributes(
2009 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
2010 }
2011 return Upgraded;
2012}
2013
2015 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
2016 GV->getName() == "llvm.global_dtors")) ||
2017 !GV->hasInitializer())
2018 return nullptr;
2020 if (!ATy)
2021 return nullptr;
2023 if (!STy || STy->getNumElements() != 2)
2024 return nullptr;
2025
2026 LLVMContext &C = GV->getContext();
2027 IRBuilder<> IRB(C);
2028 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
2029 IRB.getPtrTy());
2030 Constant *Init = GV->getInitializer();
2031 unsigned N = Init->getNumOperands();
2032 std::vector<Constant *> NewCtors(N);
2033 for (unsigned i = 0; i != N; ++i) {
2034 auto Ctor = cast<Constant>(Init->getOperand(i));
2035 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
2036 Ctor->getAggregateElement(1),
2038 }
2039 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
2040
2041 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
2042 NewInit, GV->getName());
2043}
2044
2045// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
2046// to byte shuffles.
2048 unsigned Shift) {
2049 auto *ResultTy = cast<FixedVectorType>(Op->getType());
2050 unsigned NumElts = ResultTy->getNumElements() * 8;
2051
2052 // Bitcast from a 64-bit element type to a byte element type.
2053 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
2054 Op = Builder.CreateBitCast(Op, VecTy, "cast");
2055
2056 // We'll be shuffling in zeroes.
2057 Value *Res = Constant::getNullValue(VecTy);
2058
2059 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2060 // we'll just return the zero vector.
2061 if (Shift < 16) {
2062 int Idxs[64];
2063 // 256/512-bit version is split into 2/4 16-byte lanes.
2064 for (unsigned l = 0; l != NumElts; l += 16)
2065 for (unsigned i = 0; i != 16; ++i) {
2066 unsigned Idx = NumElts + i - Shift;
2067 if (Idx < NumElts)
2068 Idx -= NumElts - 16; // end of lane, switch operand.
2069 Idxs[l + i] = Idx + l;
2070 }
2071
2072 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
2073 }
2074
2075 // Bitcast back to a 64-bit element type.
2076 return Builder.CreateBitCast(Res, ResultTy, "cast");
2077}
2078
2079// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
2080// to byte shuffles.
2082 unsigned Shift) {
2083 auto *ResultTy = cast<FixedVectorType>(Op->getType());
2084 unsigned NumElts = ResultTy->getNumElements() * 8;
2085
2086 // Bitcast from a 64-bit element type to a byte element type.
2087 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
2088 Op = Builder.CreateBitCast(Op, VecTy, "cast");
2089
2090 // We'll be shuffling in zeroes.
2091 Value *Res = Constant::getNullValue(VecTy);
2092
2093 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2094 // we'll just return the zero vector.
2095 if (Shift < 16) {
2096 int Idxs[64];
2097 // 256/512-bit version is split into 2/4 16-byte lanes.
2098 for (unsigned l = 0; l != NumElts; l += 16)
2099 for (unsigned i = 0; i != 16; ++i) {
2100 unsigned Idx = i + Shift;
2101 if (Idx >= 16)
2102 Idx += NumElts - 16; // end of lane, switch operand.
2103 Idxs[l + i] = Idx + l;
2104 }
2105
2106 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2107 }
2108
2109 // Bitcast back to a 64-bit element type.
2110 return Builder.CreateBitCast(Res, ResultTy, "cast");
2111}
2112
2113static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2114 unsigned NumElts) {
2115 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2117 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2118 Mask = Builder.CreateBitCast(Mask, MaskTy);
2119
2120 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2121 // i8 and we need to extract down to the right number of elements.
2122 if (NumElts <= 4) {
2123 int Indices[4];
2124 for (unsigned i = 0; i != NumElts; ++i)
2125 Indices[i] = i;
2126 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2127 "extract");
2128 }
2129
2130 return Mask;
2131}
2132
2133static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2134 Value *Op1) {
2135 // If the mask is all ones just emit the first operation.
2136 if (const auto *C = dyn_cast<Constant>(Mask))
2137 if (C->isAllOnesValue())
2138 return Op0;
2139
2140 Mask = getX86MaskVec(Builder, Mask,
2141 cast<FixedVectorType>(Op0->getType())->getNumElements());
2142 return Builder.CreateSelect(Mask, Op0, Op1);
2143}
2144
2145static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2146 Value *Op1) {
2147 // If the mask is all ones just emit the first operation.
2148 if (const auto *C = dyn_cast<Constant>(Mask))
2149 if (C->isAllOnesValue())
2150 return Op0;
2151
2152 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2153 Mask->getType()->getIntegerBitWidth());
2154 Mask = Builder.CreateBitCast(Mask, MaskTy);
2155 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2156 return Builder.CreateSelect(Mask, Op0, Op1);
2157}
2158
2159// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2160// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2161// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2163 Value *Op1, Value *Shift,
2164 Value *Passthru, Value *Mask,
2165 bool IsVALIGN) {
2166 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2167
2168 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2169 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2170 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2171 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2172
2173 // Mask the immediate for VALIGN.
2174 if (IsVALIGN)
2175 ShiftVal &= (NumElts - 1);
2176
2177 // If palignr is shifting the pair of vectors more than the size of two
2178 // lanes, emit zero.
2179 if (ShiftVal >= 32)
2181
2182 // If palignr is shifting the pair of input vectors more than one lane,
2183 // but less than two lanes, convert to shifting in zeroes.
2184 if (ShiftVal > 16) {
2185 ShiftVal -= 16;
2186 Op1 = Op0;
2188 }
2189
2190 int Indices[64];
2191 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2192 for (unsigned l = 0; l < NumElts; l += 16) {
2193 for (unsigned i = 0; i != 16; ++i) {
2194 unsigned Idx = ShiftVal + i;
2195 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2196 Idx += NumElts - 16; // End of lane, switch operand.
2197 Indices[l + i] = Idx + l;
2198 }
2199 }
2200
2201 Value *Align = Builder.CreateShuffleVector(
2202 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2203
2204 return emitX86Select(Builder, Mask, Align, Passthru);
2205}
2206
2208 bool ZeroMask, bool IndexForm) {
2209 Type *Ty = CI.getType();
2210 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2211 unsigned EltWidth = Ty->getScalarSizeInBits();
2212 bool IsFloat = Ty->isFPOrFPVectorTy();
2213 Intrinsic::ID IID;
2214 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2215 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2216 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2217 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2218 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2219 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2220 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2221 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2222 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2223 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2224 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2225 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2226 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2227 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2228 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2229 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2230 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2231 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2232 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2233 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2234 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2235 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2236 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2237 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2238 else if (VecWidth == 128 && EltWidth == 16)
2239 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2240 else if (VecWidth == 256 && EltWidth == 16)
2241 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2242 else if (VecWidth == 512 && EltWidth == 16)
2243 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2244 else if (VecWidth == 128 && EltWidth == 8)
2245 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2246 else if (VecWidth == 256 && EltWidth == 8)
2247 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2248 else if (VecWidth == 512 && EltWidth == 8)
2249 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2250 else
2251 llvm_unreachable("Unexpected intrinsic");
2252
2253 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2254 CI.getArgOperand(2) };
2255
2256 // If this isn't index form we need to swap operand 0 and 1.
2257 if (!IndexForm)
2258 std::swap(Args[0], Args[1]);
2259
2260 Value *V = Builder.CreateIntrinsic(IID, Args);
2261 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2262 : Builder.CreateBitCast(CI.getArgOperand(1),
2263 Ty);
2264 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2265}
2266
2268 Intrinsic::ID IID) {
2269 Type *Ty = CI.getType();
2270 Value *Op0 = CI.getOperand(0);
2271 Value *Op1 = CI.getOperand(1);
2272 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2273
2274 if (CI.arg_size() == 4) { // For masked intrinsics.
2275 Value *VecSrc = CI.getOperand(2);
2276 Value *Mask = CI.getOperand(3);
2277 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2278 }
2279 return Res;
2280}
2281
2283 bool IsRotateRight) {
2284 Type *Ty = CI.getType();
2285 Value *Src = CI.getArgOperand(0);
2286 Value *Amt = CI.getArgOperand(1);
2287
2288 // Amount may be scalar immediate, in which case create a splat vector.
2289 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2290 // we only care about the lowest log2 bits anyway.
2291 if (Amt->getType() != Ty) {
2292 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2293 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2294 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2295 }
2296
2297 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2298 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2299
2300 if (CI.arg_size() == 4) { // For masked intrinsics.
2301 Value *VecSrc = CI.getOperand(2);
2302 Value *Mask = CI.getOperand(3);
2303 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2304 }
2305 return Res;
2306}
2307
2308static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2309 bool IsSigned) {
2310 Type *Ty = CI.getType();
2311 Value *LHS = CI.getArgOperand(0);
2312 Value *RHS = CI.getArgOperand(1);
2313
2314 CmpInst::Predicate Pred;
2315 switch (Imm) {
2316 case 0x0:
2317 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2318 break;
2319 case 0x1:
2320 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2321 break;
2322 case 0x2:
2323 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2324 break;
2325 case 0x3:
2326 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2327 break;
2328 case 0x4:
2329 Pred = ICmpInst::ICMP_EQ;
2330 break;
2331 case 0x5:
2332 Pred = ICmpInst::ICMP_NE;
2333 break;
2334 case 0x6:
2335 return Constant::getNullValue(Ty); // FALSE
2336 case 0x7:
2337 return Constant::getAllOnesValue(Ty); // TRUE
2338 default:
2339 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2340 }
2341
2342 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2343 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2344 return Ext;
2345}
2346
2348 bool IsShiftRight, bool ZeroMask) {
2349 Type *Ty = CI.getType();
2350 Value *Op0 = CI.getArgOperand(0);
2351 Value *Op1 = CI.getArgOperand(1);
2352 Value *Amt = CI.getArgOperand(2);
2353
2354 if (IsShiftRight)
2355 std::swap(Op0, Op1);
2356
2357 // Amount may be scalar immediate, in which case create a splat vector.
2358 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2359 // we only care about the lowest log2 bits anyway.
2360 if (Amt->getType() != Ty) {
2361 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2362 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2363 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2364 }
2365
2366 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2367 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2368
2369 unsigned NumArgs = CI.arg_size();
2370 if (NumArgs >= 4) { // For masked intrinsics.
2371 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2372 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2373 CI.getArgOperand(0);
2374 Value *Mask = CI.getOperand(NumArgs - 1);
2375 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2376 }
2377 return Res;
2378}
2379
2381 Value *Mask, bool Aligned) {
2382 const Align Alignment =
2383 Aligned
2384 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2385 : Align(1);
2386
2387 // If the mask is all ones just emit a regular store.
2388 if (const auto *C = dyn_cast<Constant>(Mask))
2389 if (C->isAllOnesValue())
2390 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2391
2392 // Convert the mask from an integer type to a vector of i1.
2393 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2394 Mask = getX86MaskVec(Builder, Mask, NumElts);
2395 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2396}
2397
2399 Value *Passthru, Value *Mask, bool Aligned) {
2400 Type *ValTy = Passthru->getType();
2401 const Align Alignment =
2402 Aligned
2403 ? Align(
2405 8)
2406 : Align(1);
2407
2408 // If the mask is all ones just emit a regular store.
2409 if (const auto *C = dyn_cast<Constant>(Mask))
2410 if (C->isAllOnesValue())
2411 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2412
2413 // Convert the mask from an integer type to a vector of i1.
2414 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2415 Mask = getX86MaskVec(Builder, Mask, NumElts);
2416 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2417}
2418
2419static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2420 Type *Ty = CI.getType();
2421 Value *Op0 = CI.getArgOperand(0);
2422 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2423 {Op0, Builder.getInt1(false)});
2424 if (CI.arg_size() == 3)
2425 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2426 return Res;
2427}
2428
2429static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2430 Type *Ty = CI.getType();
2431
2432 // Arguments have a vXi32 type so cast to vXi64.
2433 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2434 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2435
2436 if (IsSigned) {
2437 // Shift left then arithmetic shift right.
2438 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2439 LHS = Builder.CreateShl(LHS, ShiftAmt);
2440 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2441 RHS = Builder.CreateShl(RHS, ShiftAmt);
2442 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2443 } else {
2444 // Clear the upper bits.
2445 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2446 LHS = Builder.CreateAnd(LHS, Mask);
2447 RHS = Builder.CreateAnd(RHS, Mask);
2448 }
2449
2450 Value *Res = Builder.CreateMul(LHS, RHS);
2451
2452 if (CI.arg_size() == 4)
2453 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2454
2455 return Res;
2456}
2457
2458// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2460 Value *Mask) {
2461 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2462 if (Mask) {
2463 const auto *C = dyn_cast<Constant>(Mask);
2464 if (!C || !C->isAllOnesValue())
2465 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2466 }
2467
2468 if (NumElts < 8) {
2469 int Indices[8];
2470 for (unsigned i = 0; i != NumElts; ++i)
2471 Indices[i] = i;
2472 for (unsigned i = NumElts; i != 8; ++i)
2473 Indices[i] = NumElts + i % NumElts;
2474 Vec = Builder.CreateShuffleVector(Vec,
2476 Indices);
2477 }
2478 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2479}
2480
2482 unsigned CC, bool Signed) {
2483 Value *Op0 = CI.getArgOperand(0);
2484 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2485
2486 Value *Cmp;
2487 if (CC == 3) {
2489 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2490 } else if (CC == 7) {
2492 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2493 } else {
2495 switch (CC) {
2496 default: llvm_unreachable("Unknown condition code");
2497 case 0: Pred = ICmpInst::ICMP_EQ; break;
2498 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2499 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2500 case 4: Pred = ICmpInst::ICMP_NE; break;
2501 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2502 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2503 }
2504 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2505 }
2506
2507 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2508
2509 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2510}
2511
2512// Replace a masked intrinsic with an older unmasked intrinsic.
2514 Intrinsic::ID IID) {
2515 Value *Rep =
2516 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2517 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2518}
2519
2521 Value* A = CI.getArgOperand(0);
2522 Value* B = CI.getArgOperand(1);
2523 Value* Src = CI.getArgOperand(2);
2524 Value* Mask = CI.getArgOperand(3);
2525
2526 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2527 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2528 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2529 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2530 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2531 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2532}
2533
2535 Value* Op = CI.getArgOperand(0);
2536 Type* ReturnOp = CI.getType();
2537 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2538 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2539 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2540}
2541
2542// Replace intrinsic with unmasked version and a select.
2544 CallBase &CI, Value *&Rep) {
2545 Name = Name.substr(12); // Remove avx512.mask.
2546
2547 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2548 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2549 Intrinsic::ID IID;
2550 if (Name.starts_with("max.p")) {
2551 if (VecWidth == 128 && EltWidth == 32)
2552 IID = Intrinsic::x86_sse_max_ps;
2553 else if (VecWidth == 128 && EltWidth == 64)
2554 IID = Intrinsic::x86_sse2_max_pd;
2555 else if (VecWidth == 256 && EltWidth == 32)
2556 IID = Intrinsic::x86_avx_max_ps_256;
2557 else if (VecWidth == 256 && EltWidth == 64)
2558 IID = Intrinsic::x86_avx_max_pd_256;
2559 else
2560 llvm_unreachable("Unexpected intrinsic");
2561 } else if (Name.starts_with("min.p")) {
2562 if (VecWidth == 128 && EltWidth == 32)
2563 IID = Intrinsic::x86_sse_min_ps;
2564 else if (VecWidth == 128 && EltWidth == 64)
2565 IID = Intrinsic::x86_sse2_min_pd;
2566 else if (VecWidth == 256 && EltWidth == 32)
2567 IID = Intrinsic::x86_avx_min_ps_256;
2568 else if (VecWidth == 256 && EltWidth == 64)
2569 IID = Intrinsic::x86_avx_min_pd_256;
2570 else
2571 llvm_unreachable("Unexpected intrinsic");
2572 } else if (Name.starts_with("pshuf.b.")) {
2573 if (VecWidth == 128)
2574 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2575 else if (VecWidth == 256)
2576 IID = Intrinsic::x86_avx2_pshuf_b;
2577 else if (VecWidth == 512)
2578 IID = Intrinsic::x86_avx512_pshuf_b_512;
2579 else
2580 llvm_unreachable("Unexpected intrinsic");
2581 } else if (Name.starts_with("pmul.hr.sw.")) {
2582 if (VecWidth == 128)
2583 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2584 else if (VecWidth == 256)
2585 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2586 else if (VecWidth == 512)
2587 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2588 else
2589 llvm_unreachable("Unexpected intrinsic");
2590 } else if (Name.starts_with("pmulh.w.")) {
2591 if (VecWidth == 128)
2592 IID = Intrinsic::x86_sse2_pmulh_w;
2593 else if (VecWidth == 256)
2594 IID = Intrinsic::x86_avx2_pmulh_w;
2595 else if (VecWidth == 512)
2596 IID = Intrinsic::x86_avx512_pmulh_w_512;
2597 else
2598 llvm_unreachable("Unexpected intrinsic");
2599 } else if (Name.starts_with("pmulhu.w.")) {
2600 if (VecWidth == 128)
2601 IID = Intrinsic::x86_sse2_pmulhu_w;
2602 else if (VecWidth == 256)
2603 IID = Intrinsic::x86_avx2_pmulhu_w;
2604 else if (VecWidth == 512)
2605 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2606 else
2607 llvm_unreachable("Unexpected intrinsic");
2608 } else if (Name.starts_with("pmaddw.d.")) {
2609 if (VecWidth == 128)
2610 IID = Intrinsic::x86_sse2_pmadd_wd;
2611 else if (VecWidth == 256)
2612 IID = Intrinsic::x86_avx2_pmadd_wd;
2613 else if (VecWidth == 512)
2614 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2615 else
2616 llvm_unreachable("Unexpected intrinsic");
2617 } else if (Name.starts_with("pmaddubs.w.")) {
2618 if (VecWidth == 128)
2619 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2620 else if (VecWidth == 256)
2621 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2622 else if (VecWidth == 512)
2623 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2624 else
2625 llvm_unreachable("Unexpected intrinsic");
2626 } else if (Name.starts_with("packsswb.")) {
2627 if (VecWidth == 128)
2628 IID = Intrinsic::x86_sse2_packsswb_128;
2629 else if (VecWidth == 256)
2630 IID = Intrinsic::x86_avx2_packsswb;
2631 else if (VecWidth == 512)
2632 IID = Intrinsic::x86_avx512_packsswb_512;
2633 else
2634 llvm_unreachable("Unexpected intrinsic");
2635 } else if (Name.starts_with("packssdw.")) {
2636 if (VecWidth == 128)
2637 IID = Intrinsic::x86_sse2_packssdw_128;
2638 else if (VecWidth == 256)
2639 IID = Intrinsic::x86_avx2_packssdw;
2640 else if (VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_packssdw_512;
2642 else
2643 llvm_unreachable("Unexpected intrinsic");
2644 } else if (Name.starts_with("packuswb.")) {
2645 if (VecWidth == 128)
2646 IID = Intrinsic::x86_sse2_packuswb_128;
2647 else if (VecWidth == 256)
2648 IID = Intrinsic::x86_avx2_packuswb;
2649 else if (VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_packuswb_512;
2651 else
2652 llvm_unreachable("Unexpected intrinsic");
2653 } else if (Name.starts_with("packusdw.")) {
2654 if (VecWidth == 128)
2655 IID = Intrinsic::x86_sse41_packusdw;
2656 else if (VecWidth == 256)
2657 IID = Intrinsic::x86_avx2_packusdw;
2658 else if (VecWidth == 512)
2659 IID = Intrinsic::x86_avx512_packusdw_512;
2660 else
2661 llvm_unreachable("Unexpected intrinsic");
2662 } else if (Name.starts_with("vpermilvar.")) {
2663 if (VecWidth == 128 && EltWidth == 32)
2664 IID = Intrinsic::x86_avx_vpermilvar_ps;
2665 else if (VecWidth == 128 && EltWidth == 64)
2666 IID = Intrinsic::x86_avx_vpermilvar_pd;
2667 else if (VecWidth == 256 && EltWidth == 32)
2668 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2669 else if (VecWidth == 256 && EltWidth == 64)
2670 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2671 else if (VecWidth == 512 && EltWidth == 32)
2672 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2673 else if (VecWidth == 512 && EltWidth == 64)
2674 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2675 else
2676 llvm_unreachable("Unexpected intrinsic");
2677 } else if (Name == "cvtpd2dq.256") {
2678 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2679 } else if (Name == "cvtpd2ps.256") {
2680 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2681 } else if (Name == "cvttpd2dq.256") {
2682 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2683 } else if (Name == "cvttps2dq.128") {
2684 IID = Intrinsic::x86_sse2_cvttps2dq;
2685 } else if (Name == "cvttps2dq.256") {
2686 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2687 } else if (Name.starts_with("permvar.")) {
2688 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2689 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2690 IID = Intrinsic::x86_avx2_permps;
2691 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2692 IID = Intrinsic::x86_avx2_permd;
2693 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2694 IID = Intrinsic::x86_avx512_permvar_df_256;
2695 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2696 IID = Intrinsic::x86_avx512_permvar_di_256;
2697 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2698 IID = Intrinsic::x86_avx512_permvar_sf_512;
2699 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2700 IID = Intrinsic::x86_avx512_permvar_si_512;
2701 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2702 IID = Intrinsic::x86_avx512_permvar_df_512;
2703 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2704 IID = Intrinsic::x86_avx512_permvar_di_512;
2705 else if (VecWidth == 128 && EltWidth == 16)
2706 IID = Intrinsic::x86_avx512_permvar_hi_128;
2707 else if (VecWidth == 256 && EltWidth == 16)
2708 IID = Intrinsic::x86_avx512_permvar_hi_256;
2709 else if (VecWidth == 512 && EltWidth == 16)
2710 IID = Intrinsic::x86_avx512_permvar_hi_512;
2711 else if (VecWidth == 128 && EltWidth == 8)
2712 IID = Intrinsic::x86_avx512_permvar_qi_128;
2713 else if (VecWidth == 256 && EltWidth == 8)
2714 IID = Intrinsic::x86_avx512_permvar_qi_256;
2715 else if (VecWidth == 512 && EltWidth == 8)
2716 IID = Intrinsic::x86_avx512_permvar_qi_512;
2717 else
2718 llvm_unreachable("Unexpected intrinsic");
2719 } else if (Name.starts_with("dbpsadbw.")) {
2720 if (VecWidth == 128)
2721 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2722 else if (VecWidth == 256)
2723 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2724 else if (VecWidth == 512)
2725 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2726 else
2727 llvm_unreachable("Unexpected intrinsic");
2728 } else if (Name.starts_with("pmultishift.qb.")) {
2729 if (VecWidth == 128)
2730 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2731 else if (VecWidth == 256)
2732 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2733 else if (VecWidth == 512)
2734 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2735 else
2736 llvm_unreachable("Unexpected intrinsic");
2737 } else if (Name.starts_with("conflict.")) {
2738 if (Name[9] == 'd' && VecWidth == 128)
2739 IID = Intrinsic::x86_avx512_conflict_d_128;
2740 else if (Name[9] == 'd' && VecWidth == 256)
2741 IID = Intrinsic::x86_avx512_conflict_d_256;
2742 else if (Name[9] == 'd' && VecWidth == 512)
2743 IID = Intrinsic::x86_avx512_conflict_d_512;
2744 else if (Name[9] == 'q' && VecWidth == 128)
2745 IID = Intrinsic::x86_avx512_conflict_q_128;
2746 else if (Name[9] == 'q' && VecWidth == 256)
2747 IID = Intrinsic::x86_avx512_conflict_q_256;
2748 else if (Name[9] == 'q' && VecWidth == 512)
2749 IID = Intrinsic::x86_avx512_conflict_q_512;
2750 else
2751 llvm_unreachable("Unexpected intrinsic");
2752 } else if (Name.starts_with("pavg.")) {
2753 if (Name[5] == 'b' && VecWidth == 128)
2754 IID = Intrinsic::x86_sse2_pavg_b;
2755 else if (Name[5] == 'b' && VecWidth == 256)
2756 IID = Intrinsic::x86_avx2_pavg_b;
2757 else if (Name[5] == 'b' && VecWidth == 512)
2758 IID = Intrinsic::x86_avx512_pavg_b_512;
2759 else if (Name[5] == 'w' && VecWidth == 128)
2760 IID = Intrinsic::x86_sse2_pavg_w;
2761 else if (Name[5] == 'w' && VecWidth == 256)
2762 IID = Intrinsic::x86_avx2_pavg_w;
2763 else if (Name[5] == 'w' && VecWidth == 512)
2764 IID = Intrinsic::x86_avx512_pavg_w_512;
2765 else
2766 llvm_unreachable("Unexpected intrinsic");
2767 } else
2768 return false;
2769
2770 SmallVector<Value *, 4> Args(CI.args());
2771 Args.pop_back();
2772 Args.pop_back();
2773 Rep = Builder.CreateIntrinsic(IID, Args);
2774 unsigned NumArgs = CI.arg_size();
2775 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2776 CI.getArgOperand(NumArgs - 2));
2777 return true;
2778}
2779
2780/// Upgrade comment in call to inline asm that represents an objc retain release
2781/// marker.
2782void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2783 size_t Pos;
2784 if (AsmStr->find("mov\tfp") == 0 &&
2785 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2786 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2787 AsmStr->replace(Pos, 1, ";");
2788 }
2789}
2790
2792 Function *F, IRBuilder<> &Builder) {
2793 Value *Rep = nullptr;
2794
2795 if (Name == "abs.i" || Name == "abs.ll") {
2796 Value *Arg = CI->getArgOperand(0);
2797 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->getType()},
2798 {Arg, Builder.getTrue()},
2799 /*FMFSource=*/nullptr, "abs");
2800 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2801 Type *Ty = (Name == "abs.bf16")
2802 ? Builder.getBFloatTy()
2803 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2804 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2805 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2806 Rep = Builder.CreateBitCast(Abs, CI->getType());
2807 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2808 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2809 : Intrinsic::nvvm_fabs;
2810 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2811 } else if (Name.consume_front("ex2.approx.")) {
2812 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2813 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2814 : Intrinsic::nvvm_ex2_approx;
2815 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2816 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2817 Name.starts_with("atomic.load.add.f64.p")) {
2818 Value *Ptr = CI->getArgOperand(0);
2819 Value *Val = CI->getArgOperand(1);
2820 Rep = Builder.CreateAtomicRMW(
2822 CI->getContext().getOrInsertSyncScopeID("device"));
2823 // The default scope for atomic.load.* intrinsics is device
2824 // (= gpu scope in ptx), but the default LLVM atomic scope is
2825 // "system"
2826 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2827 Name.starts_with("atomic.load.dec.32.p")) {
2828 Value *Ptr = CI->getArgOperand(0);
2829 Value *Val = CI->getArgOperand(1);
2830 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2832 Rep = Builder.CreateAtomicRMW(
2834 CI->getContext().getOrInsertSyncScopeID("device"));
2835 // See comment above.
2836 } else if (Name.starts_with("atomic.") && Name.contains(".gen.")) {
2837 // nvvm.atomic.{op}.gen.{i,f}.{cta,sys} -> atomicrmw / cmpxchg.
2838 StringRef Op = Name.substr(StringRef("atomic.").size());
2839 Value *Ptr = CI->getArgOperand(0);
2840 Value *Val = CI->getArgOperand(1);
2842 Op.contains(".cta.") ? "block" : "");
2843 if (Op.starts_with("cas.")) {
2844 Value *New = CI->getArgOperand(2);
2845 Value *Pair = Builder.CreateAtomicCmpXchg(
2846 Ptr, Val, New, MaybeAlign(), AtomicOrdering::Monotonic,
2848 Rep = Builder.CreateExtractValue(Pair, 0);
2849 } else {
2850 // Note we don't upgrade anything to AtomicRMWInst::UMin/UMax. This is
2851 // because we were actually missing those intrinsics!
2852 AtomicRMWInst::BinOp BinOp =
2854 .StartsWith("add.gen.f", AtomicRMWInst::FAdd)
2855 .StartsWith("add.gen.i", AtomicRMWInst::Add)
2866 "unexpected nvvm scoped atomic intrinsic");
2867 Rep = Builder.CreateAtomicRMW(BinOp, Ptr, Val, MaybeAlign(),
2869 }
2870 } else if (Name == "clz.ll") {
2871 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2872 Value *Arg = CI->getArgOperand(0);
2873 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2874 {Arg, Builder.getFalse()},
2875 /*FMFSource=*/nullptr, "ctlz");
2876 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2877 } else if (Name == "popc.ll") {
2878 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2879 // i64.
2880 Value *Arg = CI->getArgOperand(0);
2881 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2882 Arg, /*FMFSource=*/nullptr, "ctpop");
2883 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2884 } else if (Name == "h2f") {
2885 Value *Cast =
2886 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2887 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2888 } else if (Name.consume_front("bitcast.") &&
2889 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2890 Name == "d2ll")) {
2891 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2892 } else if (Name == "rotate.b32") {
2893 Value *Arg = CI->getOperand(0);
2894 Value *ShiftAmt = CI->getOperand(1);
2895 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2896 {Arg, Arg, ShiftAmt});
2897 } else if (Name == "rotate.b64") {
2898 Type *Int64Ty = Builder.getInt64Ty();
2899 Value *Arg = CI->getOperand(0);
2900 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2901 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2902 {Arg, Arg, ZExtShiftAmt});
2903 } else if (Name == "rotate.right.b64") {
2904 Type *Int64Ty = Builder.getInt64Ty();
2905 Value *Arg = CI->getOperand(0);
2906 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2907 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2908 {Arg, Arg, ZExtShiftAmt});
2909 } else if (Name == "swap.lo.hi.b64") {
2910 Type *Int64Ty = Builder.getInt64Ty();
2911 Value *Arg = CI->getOperand(0);
2912 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2913 {Arg, Arg, Builder.getInt64(32)});
2914 } else if ((Name.consume_front("ptr.gen.to.") &&
2915 consumeNVVMPtrAddrSpace(Name)) ||
2916 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2917 Name.starts_with(".to.gen"))) {
2918 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2919 } else if (Name.consume_front("ldg.global")) {
2920 Value *Ptr = CI->getArgOperand(0);
2921 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2922 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2923 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2924 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2925 MDNode *MD = MDNode::get(Builder.getContext(), {});
2926 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2927 return LD;
2928 } else if (Name == "tanh.approx.f32") {
2929 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2930 FastMathFlags FMF;
2931 FMF.setApproxFunc();
2932 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2933 FMF);
2934 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2935 Value *Arg =
2936 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2937 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2938 {}, {Arg});
2939 } else if (Name == "barrier") {
2940 Rep = Builder.CreateIntrinsic(
2941 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2942 {CI->getArgOperand(0), CI->getArgOperand(1)});
2943 } else if (Name == "barrier.sync") {
2944 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2945 {CI->getArgOperand(0)});
2946 } else if (Name == "barrier.sync.cnt") {
2947 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2948 {CI->getArgOperand(0), CI->getArgOperand(1)});
2949 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2950 Name == "barrier0.or") {
2951 Value *C = CI->getArgOperand(0);
2952 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2953
2954 Intrinsic::ID IID =
2956 .Case("barrier0.popc",
2957 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2958 .Case("barrier0.and",
2959 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2960 .Case("barrier0.or",
2961 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2962 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2963 Rep = Builder.CreateZExt(Bar, CI->getType());
2964 } else {
2966 if (IID != Intrinsic::not_intrinsic &&
2967 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2968 rename(F);
2969 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2971 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2972 Value *Arg = CI->getArgOperand(I);
2973 Type *OldType = Arg->getType();
2974 Type *NewType = NewFn->getArg(I)->getType();
2975 Args.push_back(
2976 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2977 ? Builder.CreateBitCast(Arg, NewType)
2978 : Arg);
2979 }
2980 Rep = Builder.CreateCall(NewFn, Args);
2981 if (F->getReturnType()->isIntegerTy())
2982 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2983 }
2984 }
2985
2986 return Rep;
2987}
2988
2990 IRBuilder<> &Builder) {
2991 LLVMContext &C = F->getContext();
2992 Value *Rep = nullptr;
2993
2994 if (Name.starts_with("sse4a.movnt.")) {
2996 Elts.push_back(
2997 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2998 MDNode *Node = MDNode::get(C, Elts);
2999
3000 Value *Arg0 = CI->getArgOperand(0);
3001 Value *Arg1 = CI->getArgOperand(1);
3002
3003 // Nontemporal (unaligned) store of the 0'th element of the float/double
3004 // vector.
3005 Value *Extract =
3006 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
3007
3008 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
3009 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
3010 } else if (Name.starts_with("avx.movnt.") ||
3011 Name.starts_with("avx512.storent.")) {
3013 Elts.push_back(
3014 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3015 MDNode *Node = MDNode::get(C, Elts);
3016
3017 Value *Arg0 = CI->getArgOperand(0);
3018 Value *Arg1 = CI->getArgOperand(1);
3019
3020 StoreInst *SI = Builder.CreateAlignedStore(
3021 Arg1, Arg0,
3023 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
3024 } else if (Name == "sse2.storel.dq") {
3025 Value *Arg0 = CI->getArgOperand(0);
3026 Value *Arg1 = CI->getArgOperand(1);
3027
3028 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3029 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3030 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
3031 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
3032 } else if (Name.starts_with("sse.storeu.") ||
3033 Name.starts_with("sse2.storeu.") ||
3034 Name.starts_with("avx.storeu.")) {
3035 Value *Arg0 = CI->getArgOperand(0);
3036 Value *Arg1 = CI->getArgOperand(1);
3037 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
3038 } else if (Name == "avx512.mask.store.ss") {
3039 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
3040 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3041 Mask, false);
3042 } else if (Name.starts_with("avx512.mask.store")) {
3043 // "avx512.mask.storeu." or "avx512.mask.store."
3044 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
3045 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3046 CI->getArgOperand(2), Aligned);
3047 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
3048 // Upgrade packed integer vector compare intrinsics to compare instructions.
3049 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
3050 bool CmpEq = Name[9] == 'e';
3051 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
3052 CI->getArgOperand(0), CI->getArgOperand(1));
3053 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
3054 } else if (Name.starts_with("avx512.broadcastm")) {
3055 Type *ExtTy = Type::getInt32Ty(C);
3056 if (CI->getOperand(0)->getType()->isIntegerTy(8))
3057 ExtTy = Type::getInt64Ty(C);
3058 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
3059 ExtTy->getPrimitiveSizeInBits();
3060 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
3061 Rep = Builder.CreateVectorSplat(NumElts, Rep);
3062 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
3063 Value *Vec = CI->getArgOperand(0);
3064 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
3065 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
3066 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
3067 } else if (Name.starts_with("avx.sqrt.p") ||
3068 Name.starts_with("sse2.sqrt.p") ||
3069 Name.starts_with("sse.sqrt.p")) {
3070 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
3071 {CI->getArgOperand(0)});
3072 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
3073 if (CI->arg_size() == 4 &&
3074 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3075 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3076 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
3077 : Intrinsic::x86_avx512_sqrt_pd_512;
3078
3079 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
3080 Rep = Builder.CreateIntrinsic(IID, Args);
3081 } else {
3082 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
3083 {CI->getArgOperand(0)});
3084 }
3085 Rep =
3086 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3087 } else if (Name.starts_with("avx512.ptestm") ||
3088 Name.starts_with("avx512.ptestnm")) {
3089 Value *Op0 = CI->getArgOperand(0);
3090 Value *Op1 = CI->getArgOperand(1);
3091 Value *Mask = CI->getArgOperand(2);
3092 Rep = Builder.CreateAnd(Op0, Op1);
3093 llvm::Type *Ty = Op0->getType();
3095 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
3098 Rep = Builder.CreateICmp(Pred, Rep, Zero);
3099 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
3100 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
3101 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
3102 ->getNumElements();
3103 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
3104 Rep =
3105 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3106 } else if (Name.starts_with("avx512.kunpck")) {
3107 unsigned NumElts = CI->getType()->getScalarSizeInBits();
3108 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
3109 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
3110 int Indices[64];
3111 for (unsigned i = 0; i != NumElts; ++i)
3112 Indices[i] = i;
3113
3114 // First extract half of each vector. This gives better codegen than
3115 // doing it in a single shuffle.
3116 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
3117 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
3118 // Concat the vectors.
3119 // NOTE: Operands have to be swapped to match intrinsic definition.
3120 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
3121 Rep = Builder.CreateBitCast(Rep, CI->getType());
3122 } else if (Name == "avx512.kand.w") {
3123 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3124 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3125 Rep = Builder.CreateAnd(LHS, RHS);
3126 Rep = Builder.CreateBitCast(Rep, CI->getType());
3127 } else if (Name == "avx512.kandn.w") {
3128 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3129 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3130 LHS = Builder.CreateNot(LHS);
3131 Rep = Builder.CreateAnd(LHS, RHS);
3132 Rep = Builder.CreateBitCast(Rep, CI->getType());
3133 } else if (Name == "avx512.kor.w") {
3134 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3135 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3136 Rep = Builder.CreateOr(LHS, RHS);
3137 Rep = Builder.CreateBitCast(Rep, CI->getType());
3138 } else if (Name == "avx512.kxor.w") {
3139 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3140 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3141 Rep = Builder.CreateXor(LHS, RHS);
3142 Rep = Builder.CreateBitCast(Rep, CI->getType());
3143 } else if (Name == "avx512.kxnor.w") {
3144 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3145 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3146 LHS = Builder.CreateNot(LHS);
3147 Rep = Builder.CreateXor(LHS, RHS);
3148 Rep = Builder.CreateBitCast(Rep, CI->getType());
3149 } else if (Name == "avx512.knot.w") {
3150 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3151 Rep = Builder.CreateNot(Rep);
3152 Rep = Builder.CreateBitCast(Rep, CI->getType());
3153 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3154 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3155 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3156 Rep = Builder.CreateOr(LHS, RHS);
3157 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3158 Value *C;
3159 if (Name[14] == 'c')
3160 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3161 else
3162 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3163 Rep = Builder.CreateICmpEQ(Rep, C);
3164 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3165 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3166 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3167 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3168 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3169 Type *I32Ty = Type::getInt32Ty(C);
3170 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3171 ConstantInt::get(I32Ty, 0));
3172 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3173 ConstantInt::get(I32Ty, 0));
3174 Value *EltOp;
3175 if (Name.contains(".add."))
3176 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3177 else if (Name.contains(".sub."))
3178 EltOp = Builder.CreateFSub(Elt0, Elt1);
3179 else if (Name.contains(".mul."))
3180 EltOp = Builder.CreateFMul(Elt0, Elt1);
3181 else
3182 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3183 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3184 ConstantInt::get(I32Ty, 0));
3185 } else if (Name.starts_with("avx512.mask.pcmp")) {
3186 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3187 bool CmpEq = Name[16] == 'e';
3188 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3189 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3190 Type *OpTy = CI->getArgOperand(0)->getType();
3191 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3192 Intrinsic::ID IID;
3193 switch (VecWidth) {
3194 default:
3195 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3196 break;
3197 case 128:
3198 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3199 break;
3200 case 256:
3201 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3202 break;
3203 case 512:
3204 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3205 break;
3206 }
3207
3208 Rep =
3209 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3210 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3211 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3212 Type *OpTy = CI->getArgOperand(0)->getType();
3213 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3214 unsigned EltWidth = OpTy->getScalarSizeInBits();
3215 Intrinsic::ID IID;
3216 if (VecWidth == 128 && EltWidth == 32)
3217 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3218 else if (VecWidth == 256 && EltWidth == 32)
3219 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3220 else if (VecWidth == 512 && EltWidth == 32)
3221 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3222 else if (VecWidth == 128 && EltWidth == 64)
3223 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3224 else if (VecWidth == 256 && EltWidth == 64)
3225 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3226 else if (VecWidth == 512 && EltWidth == 64)
3227 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3228 else
3229 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3230
3231 Rep =
3232 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3233 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3234 } else if (Name.starts_with("avx512.cmp.p")) {
3235 SmallVector<Value *, 4> Args(CI->args());
3236 Type *OpTy = Args[0]->getType();
3237 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3238 unsigned EltWidth = OpTy->getScalarSizeInBits();
3239 Intrinsic::ID IID;
3240 if (VecWidth == 128 && EltWidth == 32)
3241 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3242 else if (VecWidth == 256 && EltWidth == 32)
3243 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3244 else if (VecWidth == 512 && EltWidth == 32)
3245 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3246 else if (VecWidth == 128 && EltWidth == 64)
3247 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3248 else if (VecWidth == 256 && EltWidth == 64)
3249 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3250 else if (VecWidth == 512 && EltWidth == 64)
3251 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3252 else
3253 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3254
3256 if (VecWidth == 512)
3257 std::swap(Mask, Args.back());
3258 Args.push_back(Mask);
3259
3260 Rep = Builder.CreateIntrinsic(IID, Args);
3261 } else if (Name.starts_with("avx512.mask.cmp.")) {
3262 // Integer compare intrinsics.
3263 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3264 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3265 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3266 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3267 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3268 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3269 Name.starts_with("avx512.cvtw2mask.") ||
3270 Name.starts_with("avx512.cvtd2mask.") ||
3271 Name.starts_with("avx512.cvtq2mask.")) {
3272 Value *Op = CI->getArgOperand(0);
3273 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3274 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3275 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3276 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3277 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3278 Name.starts_with("avx512.mask.pabs")) {
3279 Rep = upgradeAbs(Builder, *CI);
3280 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3281 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3282 Name.starts_with("avx512.mask.pmaxs")) {
3283 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3284 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3285 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3286 Name.starts_with("avx512.mask.pmaxu")) {
3287 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3288 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3289 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3290 Name.starts_with("avx512.mask.pmins")) {
3291 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3292 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3293 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3294 Name.starts_with("avx512.mask.pminu")) {
3295 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3296 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3297 Name == "avx512.pmulu.dq.512" ||
3298 Name.starts_with("avx512.mask.pmulu.dq.")) {
3299 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3300 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3301 Name == "avx512.pmul.dq.512" ||
3302 Name.starts_with("avx512.mask.pmul.dq.")) {
3303 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3304 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3305 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3306 Rep =
3307 Builder.CreateSIToFP(CI->getArgOperand(1),
3308 cast<VectorType>(CI->getType())->getElementType());
3309 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3310 } else if (Name == "avx512.cvtusi2sd") {
3311 Rep =
3312 Builder.CreateUIToFP(CI->getArgOperand(1),
3313 cast<VectorType>(CI->getType())->getElementType());
3314 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3315 } else if (Name == "sse2.cvtss2sd") {
3316 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3317 Rep = Builder.CreateFPExt(
3318 Rep, cast<VectorType>(CI->getType())->getElementType());
3319 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3320 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3321 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3322 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3323 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3324 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3325 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3326 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3327 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3328 Name == "avx512.mask.cvtqq2ps.256" ||
3329 Name == "avx512.mask.cvtqq2ps.512" ||
3330 Name == "avx512.mask.cvtuqq2ps.256" ||
3331 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3332 Name == "avx.cvt.ps2.pd.256" ||
3333 Name == "avx512.mask.cvtps2pd.128" ||
3334 Name == "avx512.mask.cvtps2pd.256") {
3335 auto *DstTy = cast<FixedVectorType>(CI->getType());
3336 Rep = CI->getArgOperand(0);
3337 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3338
3339 unsigned NumDstElts = DstTy->getNumElements();
3340 if (NumDstElts < SrcTy->getNumElements()) {
3341 assert(NumDstElts == 2 && "Unexpected vector size");
3342 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3343 }
3344
3345 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3346 bool IsUnsigned = Name.contains("cvtu");
3347 if (IsPS2PD)
3348 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3349 else if (CI->arg_size() == 4 &&
3350 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3351 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3352 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3353 : Intrinsic::x86_avx512_sitofp_round;
3354 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3355 {Rep, CI->getArgOperand(3)});
3356 } else {
3357 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3358 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3359 }
3360
3361 if (CI->arg_size() >= 3)
3362 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3363 CI->getArgOperand(1));
3364 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3365 Name.starts_with("vcvtph2ps.")) {
3366 auto *DstTy = cast<FixedVectorType>(CI->getType());
3367 Rep = CI->getArgOperand(0);
3368 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3369 unsigned NumDstElts = DstTy->getNumElements();
3370 if (NumDstElts != SrcTy->getNumElements()) {
3371 assert(NumDstElts == 4 && "Unexpected vector size");
3372 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3373 }
3374 Rep = Builder.CreateBitCast(
3375 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3376 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3377 if (CI->arg_size() >= 3)
3378 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3379 CI->getArgOperand(1));
3380 } else if (Name.starts_with("avx512.mask.load")) {
3381 // "avx512.mask.loadu." or "avx512.mask.load."
3382 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3383 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3384 CI->getArgOperand(2), Aligned);
3385 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3386 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3387 auto *PtrTy = CI->getOperand(0)->getType();
3388 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3389 ResultTy->getNumElements());
3390 Rep = Builder.CreateIntrinsic(
3391 Intrinsic::masked_expandload, {ResultTy, PtrTy},
3392 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3393 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3394 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3395 auto *PtrTy = CI->getArgOperand(0)->getType();
3396 Value *MaskVec =
3397 getX86MaskVec(Builder, CI->getArgOperand(2),
3398 cast<FixedVectorType>(ResultTy)->getNumElements());
3399 Rep = Builder.CreateIntrinsic(
3400 Intrinsic::masked_compressstore, {ResultTy, PtrTy},
3401 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3402 } else if (Name.starts_with("avx512.mask.compress.") ||
3403 Name.starts_with("avx512.mask.expand.")) {
3404 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3405
3406 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3407 ResultTy->getNumElements());
3408
3409 bool IsCompress = Name[12] == 'c';
3410 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3411 : Intrinsic::x86_avx512_mask_expand;
3412 Rep = Builder.CreateIntrinsic(
3413 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3414 } else if (Name.starts_with("xop.vpcom")) {
3415 bool IsSigned;
3416 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3417 Name.ends_with("uq"))
3418 IsSigned = false;
3419 else if (Name.ends_with("b") || Name.ends_with("w") ||
3420 Name.ends_with("d") || Name.ends_with("q"))
3421 IsSigned = true;
3422 else
3423 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3424
3425 unsigned Imm;
3426 if (CI->arg_size() == 3) {
3427 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3428 } else {
3429 Name = Name.substr(9); // strip off "xop.vpcom"
3430 if (Name.starts_with("lt"))
3431 Imm = 0;
3432 else if (Name.starts_with("le"))
3433 Imm = 1;
3434 else if (Name.starts_with("gt"))
3435 Imm = 2;
3436 else if (Name.starts_with("ge"))
3437 Imm = 3;
3438 else if (Name.starts_with("eq"))
3439 Imm = 4;
3440 else if (Name.starts_with("ne"))
3441 Imm = 5;
3442 else if (Name.starts_with("false"))
3443 Imm = 6;
3444 else if (Name.starts_with("true"))
3445 Imm = 7;
3446 else
3447 llvm_unreachable("Unknown condition");
3448 }
3449
3450 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3451 } else if (Name.starts_with("xop.vpcmov")) {
3452 Value *Sel = CI->getArgOperand(2);
3453 Value *NotSel = Builder.CreateNot(Sel);
3454 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3455 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3456 Rep = Builder.CreateOr(Sel0, Sel1);
3457 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3458 Name.starts_with("avx512.mask.prol")) {
3459 Rep = upgradeX86Rotate(Builder, *CI, false);
3460 } else if (Name.starts_with("avx512.pror") ||
3461 Name.starts_with("avx512.mask.pror")) {
3462 Rep = upgradeX86Rotate(Builder, *CI, true);
3463 } else if (Name.starts_with("avx512.vpshld.") ||
3464 Name.starts_with("avx512.mask.vpshld") ||
3465 Name.starts_with("avx512.maskz.vpshld")) {
3466 bool ZeroMask = Name[11] == 'z';
3467 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3468 } else if (Name.starts_with("avx512.vpshrd.") ||
3469 Name.starts_with("avx512.mask.vpshrd") ||
3470 Name.starts_with("avx512.maskz.vpshrd")) {
3471 bool ZeroMask = Name[11] == 'z';
3472 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3473 } else if (Name == "sse42.crc32.64.8") {
3474 Value *Trunc0 =
3475 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3476 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3477 {Trunc0, CI->getArgOperand(1)});
3478 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3479 } else if (Name.starts_with("avx.vbroadcast.s") ||
3480 Name.starts_with("avx512.vbroadcast.s")) {
3481 // Replace broadcasts with a series of insertelements.
3482 auto *VecTy = cast<FixedVectorType>(CI->getType());
3483 Type *EltTy = VecTy->getElementType();
3484 unsigned EltNum = VecTy->getNumElements();
3485 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3486 Type *I32Ty = Type::getInt32Ty(C);
3487 Rep = PoisonValue::get(VecTy);
3488 for (unsigned I = 0; I < EltNum; ++I)
3489 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3490 } else if (Name.starts_with("sse41.pmovsx") ||
3491 Name.starts_with("sse41.pmovzx") ||
3492 Name.starts_with("avx2.pmovsx") ||
3493 Name.starts_with("avx2.pmovzx") ||
3494 Name.starts_with("avx512.mask.pmovsx") ||
3495 Name.starts_with("avx512.mask.pmovzx")) {
3496 auto *DstTy = cast<FixedVectorType>(CI->getType());
3497 unsigned NumDstElts = DstTy->getNumElements();
3498
3499 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3500 SmallVector<int, 8> ShuffleMask(NumDstElts);
3501 for (unsigned i = 0; i != NumDstElts; ++i)
3502 ShuffleMask[i] = i;
3503
3504 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3505
3506 bool DoSext = Name.contains("pmovsx");
3507 Rep =
3508 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3509 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3510 if (CI->arg_size() == 3)
3511 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3512 CI->getArgOperand(1));
3513 } else if (Name == "avx512.mask.pmov.qd.256" ||
3514 Name == "avx512.mask.pmov.qd.512" ||
3515 Name == "avx512.mask.pmov.wb.256" ||
3516 Name == "avx512.mask.pmov.wb.512") {
3517 Type *Ty = CI->getArgOperand(1)->getType();
3518 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3519 Rep =
3520 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3521 } else if (Name.starts_with("avx.vbroadcastf128") ||
3522 Name == "avx2.vbroadcasti128") {
3523 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3524 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3525 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3526 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3527 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3528 if (NumSrcElts == 2)
3529 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3530 else
3531 Rep = Builder.CreateShuffleVector(Load,
3532 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3533 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3534 Name.starts_with("avx512.mask.shuf.f")) {
3535 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3536 Type *VT = CI->getType();
3537 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3538 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3539 unsigned ControlBitsMask = NumLanes - 1;
3540 unsigned NumControlBits = NumLanes / 2;
3541 SmallVector<int, 8> ShuffleMask(0);
3542
3543 for (unsigned l = 0; l != NumLanes; ++l) {
3544 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3545 // We actually need the other source.
3546 if (l >= NumLanes / 2)
3547 LaneMask += NumLanes;
3548 for (unsigned i = 0; i != NumElementsInLane; ++i)
3549 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3550 }
3551 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3552 CI->getArgOperand(1), ShuffleMask);
3553 Rep =
3554 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3555 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3556 Name.starts_with("avx512.mask.broadcasti")) {
3557 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3558 ->getNumElements();
3559 unsigned NumDstElts =
3560 cast<FixedVectorType>(CI->getType())->getNumElements();
3561
3562 SmallVector<int, 8> ShuffleMask(NumDstElts);
3563 for (unsigned i = 0; i != NumDstElts; ++i)
3564 ShuffleMask[i] = i % NumSrcElts;
3565
3566 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3567 CI->getArgOperand(0), ShuffleMask);
3568 Rep =
3569 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3570 } else if (Name.starts_with("avx2.pbroadcast") ||
3571 Name.starts_with("avx2.vbroadcast") ||
3572 Name.starts_with("avx512.pbroadcast") ||
3573 Name.starts_with("avx512.mask.broadcast.s")) {
3574 // Replace vp?broadcasts with a vector shuffle.
3575 Value *Op = CI->getArgOperand(0);
3576 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3577 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3580 Rep = Builder.CreateShuffleVector(Op, M);
3581
3582 if (CI->arg_size() == 3)
3583 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3584 CI->getArgOperand(1));
3585 } else if (Name.starts_with("sse2.padds.") ||
3586 Name.starts_with("avx2.padds.") ||
3587 Name.starts_with("avx512.padds.") ||
3588 Name.starts_with("avx512.mask.padds.")) {
3589 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3590 } else if (Name.starts_with("sse2.psubs.") ||
3591 Name.starts_with("avx2.psubs.") ||
3592 Name.starts_with("avx512.psubs.") ||
3593 Name.starts_with("avx512.mask.psubs.")) {
3594 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3595 } else if (Name.starts_with("sse2.paddus.") ||
3596 Name.starts_with("avx2.paddus.") ||
3597 Name.starts_with("avx512.mask.paddus.")) {
3598 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3599 } else if (Name.starts_with("sse2.psubus.") ||
3600 Name.starts_with("avx2.psubus.") ||
3601 Name.starts_with("avx512.mask.psubus.")) {
3602 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3603 } else if (Name.starts_with("avx512.mask.palignr.")) {
3604 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3605 CI->getArgOperand(1), CI->getArgOperand(2),
3606 CI->getArgOperand(3), CI->getArgOperand(4),
3607 false);
3608 } else if (Name.starts_with("avx512.mask.valign.")) {
3610 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3611 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3612 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3613 // 128/256-bit shift left specified in bits.
3614 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3615 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3616 Shift / 8); // Shift is in bits.
3617 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3618 // 128/256-bit shift right specified in bits.
3619 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3620 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3621 Shift / 8); // Shift is in bits.
3622 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3623 Name == "avx512.psll.dq.512") {
3624 // 128/256/512-bit shift left specified in bytes.
3625 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3626 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3627 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3628 Name == "avx512.psrl.dq.512") {
3629 // 128/256/512-bit shift right specified in bytes.
3630 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3631 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3632 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3633 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3634 Name.starts_with("avx2.pblendd.")) {
3635 Value *Op0 = CI->getArgOperand(0);
3636 Value *Op1 = CI->getArgOperand(1);
3637 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3638 auto *VecTy = cast<FixedVectorType>(CI->getType());
3639 unsigned NumElts = VecTy->getNumElements();
3640
3641 SmallVector<int, 16> Idxs(NumElts);
3642 for (unsigned i = 0; i != NumElts; ++i)
3643 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3644
3645 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3646 } else if (Name.starts_with("avx.vinsertf128.") ||
3647 Name == "avx2.vinserti128" ||
3648 Name.starts_with("avx512.mask.insert")) {
3649 Value *Op0 = CI->getArgOperand(0);
3650 Value *Op1 = CI->getArgOperand(1);
3651 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3652 unsigned DstNumElts =
3653 cast<FixedVectorType>(CI->getType())->getNumElements();
3654 unsigned SrcNumElts =
3655 cast<FixedVectorType>(Op1->getType())->getNumElements();
3656 unsigned Scale = DstNumElts / SrcNumElts;
3657
3658 // Mask off the high bits of the immediate value; hardware ignores those.
3659 Imm = Imm % Scale;
3660
3661 // Extend the second operand into a vector the size of the destination.
3662 SmallVector<int, 8> Idxs(DstNumElts);
3663 for (unsigned i = 0; i != SrcNumElts; ++i)
3664 Idxs[i] = i;
3665 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3666 Idxs[i] = SrcNumElts;
3667 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3668
3669 // Insert the second operand into the first operand.
3670
3671 // Note that there is no guarantee that instruction lowering will actually
3672 // produce a vinsertf128 instruction for the created shuffles. In
3673 // particular, the 0 immediate case involves no lane changes, so it can
3674 // be handled as a blend.
3675
3676 // Example of shuffle mask for 32-bit elements:
3677 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3678 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3679
3680 // First fill with identify mask.
3681 for (unsigned i = 0; i != DstNumElts; ++i)
3682 Idxs[i] = i;
3683 // Then replace the elements where we need to insert.
3684 for (unsigned i = 0; i != SrcNumElts; ++i)
3685 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3686 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3687
3688 // If the intrinsic has a mask operand, handle that.
3689 if (CI->arg_size() == 5)
3690 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3691 CI->getArgOperand(3));
3692 } else if (Name.starts_with("avx.vextractf128.") ||
3693 Name == "avx2.vextracti128" ||
3694 Name.starts_with("avx512.mask.vextract")) {
3695 Value *Op0 = CI->getArgOperand(0);
3696 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3697 unsigned DstNumElts =
3698 cast<FixedVectorType>(CI->getType())->getNumElements();
3699 unsigned SrcNumElts =
3700 cast<FixedVectorType>(Op0->getType())->getNumElements();
3701 unsigned Scale = SrcNumElts / DstNumElts;
3702
3703 // Mask off the high bits of the immediate value; hardware ignores those.
3704 Imm = Imm % Scale;
3705
3706 // Get indexes for the subvector of the input vector.
3707 SmallVector<int, 8> Idxs(DstNumElts);
3708 for (unsigned i = 0; i != DstNumElts; ++i) {
3709 Idxs[i] = i + (Imm * DstNumElts);
3710 }
3711 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3712
3713 // If the intrinsic has a mask operand, handle that.
3714 if (CI->arg_size() == 4)
3715 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3716 CI->getArgOperand(2));
3717 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3718 Name.starts_with("avx512.mask.perm.di.")) {
3719 Value *Op0 = CI->getArgOperand(0);
3720 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3721 auto *VecTy = cast<FixedVectorType>(CI->getType());
3722 unsigned NumElts = VecTy->getNumElements();
3723
3724 SmallVector<int, 8> Idxs(NumElts);
3725 for (unsigned i = 0; i != NumElts; ++i)
3726 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3727
3728 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3729
3730 if (CI->arg_size() == 4)
3731 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3732 CI->getArgOperand(2));
3733 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3734 // The immediate permute control byte looks like this:
3735 // [1:0] - select 128 bits from sources for low half of destination
3736 // [2] - ignore
3737 // [3] - zero low half of destination
3738 // [5:4] - select 128 bits from sources for high half of destination
3739 // [6] - ignore
3740 // [7] - zero high half of destination
3741
3742 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3743
3744 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3745 unsigned HalfSize = NumElts / 2;
3746 SmallVector<int, 8> ShuffleMask(NumElts);
3747
3748 // Determine which operand(s) are actually in use for this instruction.
3749 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3750 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3751
3752 // If needed, replace operands based on zero mask.
3753 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3754 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3755
3756 // Permute low half of result.
3757 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3758 for (unsigned i = 0; i < HalfSize; ++i)
3759 ShuffleMask[i] = StartIndex + i;
3760
3761 // Permute high half of result.
3762 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3763 for (unsigned i = 0; i < HalfSize; ++i)
3764 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3765
3766 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3767
3768 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3769 Name.starts_with("avx512.mask.vpermil.p") ||
3770 Name.starts_with("avx512.mask.pshuf.d.")) {
3771 Value *Op0 = CI->getArgOperand(0);
3772 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3773 auto *VecTy = cast<FixedVectorType>(CI->getType());
3774 unsigned NumElts = VecTy->getNumElements();
3775 // Calculate the size of each index in the immediate.
3776 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3777 unsigned IdxMask = ((1 << IdxSize) - 1);
3778
3779 SmallVector<int, 8> Idxs(NumElts);
3780 // Lookup the bits for this element, wrapping around the immediate every
3781 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3782 // to offset by the first index of each group.
3783 for (unsigned i = 0; i != NumElts; ++i)
3784 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3785
3786 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3787
3788 if (CI->arg_size() == 4)
3789 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3790 CI->getArgOperand(2));
3791 } else if (Name == "sse2.pshufl.w" ||
3792 Name.starts_with("avx512.mask.pshufl.w.")) {
3793 Value *Op0 = CI->getArgOperand(0);
3794 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3795 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3796
3797 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3798 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3799
3800 SmallVector<int, 16> Idxs(NumElts);
3801 for (unsigned l = 0; l != NumElts; l += 8) {
3802 for (unsigned i = 0; i != 4; ++i)
3803 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3804 for (unsigned i = 4; i != 8; ++i)
3805 Idxs[i + l] = i + l;
3806 }
3807
3808 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3809
3810 if (CI->arg_size() == 4)
3811 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3812 CI->getArgOperand(2));
3813 } else if (Name == "sse2.pshufh.w" ||
3814 Name.starts_with("avx512.mask.pshufh.w.")) {
3815 Value *Op0 = CI->getArgOperand(0);
3816 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3817 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3818
3819 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3820 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3821
3822 SmallVector<int, 16> Idxs(NumElts);
3823 for (unsigned l = 0; l != NumElts; l += 8) {
3824 for (unsigned i = 0; i != 4; ++i)
3825 Idxs[i + l] = i + l;
3826 for (unsigned i = 0; i != 4; ++i)
3827 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3828 }
3829
3830 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3831
3832 if (CI->arg_size() == 4)
3833 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3834 CI->getArgOperand(2));
3835 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3836 Value *Op0 = CI->getArgOperand(0);
3837 Value *Op1 = CI->getArgOperand(1);
3838 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3839 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3840
3841 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3842 unsigned HalfLaneElts = NumLaneElts / 2;
3843
3844 SmallVector<int, 16> Idxs(NumElts);
3845 for (unsigned i = 0; i != NumElts; ++i) {
3846 // Base index is the starting element of the lane.
3847 Idxs[i] = i - (i % NumLaneElts);
3848 // If we are half way through the lane switch to the other source.
3849 if ((i % NumLaneElts) >= HalfLaneElts)
3850 Idxs[i] += NumElts;
3851 // Now select the specific element. By adding HalfLaneElts bits from
3852 // the immediate. Wrapping around the immediate every 8-bits.
3853 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3854 }
3855
3856 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3857
3858 Rep =
3859 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3860 } else if (Name.starts_with("avx512.mask.movddup") ||
3861 Name.starts_with("avx512.mask.movshdup") ||
3862 Name.starts_with("avx512.mask.movsldup")) {
3863 Value *Op0 = CI->getArgOperand(0);
3864 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3865 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3866
3867 unsigned Offset = 0;
3868 if (Name.starts_with("avx512.mask.movshdup."))
3869 Offset = 1;
3870
3871 SmallVector<int, 16> Idxs(NumElts);
3872 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3873 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3874 Idxs[i + l + 0] = i + l + Offset;
3875 Idxs[i + l + 1] = i + l + Offset;
3876 }
3877
3878 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3879
3880 Rep =
3881 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3882 } else if (Name.starts_with("avx512.mask.punpckl") ||
3883 Name.starts_with("avx512.mask.unpckl.")) {
3884 Value *Op0 = CI->getArgOperand(0);
3885 Value *Op1 = CI->getArgOperand(1);
3886 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3887 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3888
3889 SmallVector<int, 64> Idxs(NumElts);
3890 for (int l = 0; l != NumElts; l += NumLaneElts)
3891 for (int i = 0; i != NumLaneElts; ++i)
3892 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3893
3894 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3895
3896 Rep =
3897 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3898 } else if (Name.starts_with("avx512.mask.punpckh") ||
3899 Name.starts_with("avx512.mask.unpckh.")) {
3900 Value *Op0 = CI->getArgOperand(0);
3901 Value *Op1 = CI->getArgOperand(1);
3902 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3903 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3904
3905 SmallVector<int, 64> Idxs(NumElts);
3906 for (int l = 0; l != NumElts; l += NumLaneElts)
3907 for (int i = 0; i != NumLaneElts; ++i)
3908 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3909
3910 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3911
3912 Rep =
3913 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3914 } else if (Name.starts_with("avx512.mask.and.") ||
3915 Name.starts_with("avx512.mask.pand.")) {
3916 VectorType *FTy = cast<VectorType>(CI->getType());
3918 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3919 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3920 Rep = Builder.CreateBitCast(Rep, FTy);
3921 Rep =
3922 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3923 } else if (Name.starts_with("avx512.mask.andn.") ||
3924 Name.starts_with("avx512.mask.pandn.")) {
3925 VectorType *FTy = cast<VectorType>(CI->getType());
3927 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3928 Rep = Builder.CreateAnd(Rep,
3929 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3930 Rep = Builder.CreateBitCast(Rep, FTy);
3931 Rep =
3932 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3933 } else if (Name.starts_with("avx512.mask.or.") ||
3934 Name.starts_with("avx512.mask.por.")) {
3935 VectorType *FTy = cast<VectorType>(CI->getType());
3937 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3938 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3939 Rep = Builder.CreateBitCast(Rep, FTy);
3940 Rep =
3941 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3942 } else if (Name.starts_with("avx512.mask.xor.") ||
3943 Name.starts_with("avx512.mask.pxor.")) {
3944 VectorType *FTy = cast<VectorType>(CI->getType());
3946 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3947 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3948 Rep = Builder.CreateBitCast(Rep, FTy);
3949 Rep =
3950 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3951 } else if (Name.starts_with("avx512.mask.padd.")) {
3952 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3953 Rep =
3954 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3955 } else if (Name.starts_with("avx512.mask.psub.")) {
3956 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3957 Rep =
3958 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3959 } else if (Name.starts_with("avx512.mask.pmull.")) {
3960 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3961 Rep =
3962 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3963 } else if (Name.starts_with("avx512.mask.add.p")) {
3964 if (Name.ends_with(".512")) {
3965 Intrinsic::ID IID;
3966 if (Name[17] == 's')
3967 IID = Intrinsic::x86_avx512_add_ps_512;
3968 else
3969 IID = Intrinsic::x86_avx512_add_pd_512;
3970
3971 Rep = Builder.CreateIntrinsic(
3972 IID,
3973 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3974 } else {
3975 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3976 }
3977 Rep =
3978 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3979 } else if (Name.starts_with("avx512.mask.div.p")) {
3980 if (Name.ends_with(".512")) {
3981 Intrinsic::ID IID;
3982 if (Name[17] == 's')
3983 IID = Intrinsic::x86_avx512_div_ps_512;
3984 else
3985 IID = Intrinsic::x86_avx512_div_pd_512;
3986
3987 Rep = Builder.CreateIntrinsic(
3988 IID,
3989 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3990 } else {
3991 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3992 }
3993 Rep =
3994 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3995 } else if (Name.starts_with("avx512.mask.mul.p")) {
3996 if (Name.ends_with(".512")) {
3997 Intrinsic::ID IID;
3998 if (Name[17] == 's')
3999 IID = Intrinsic::x86_avx512_mul_ps_512;
4000 else
4001 IID = Intrinsic::x86_avx512_mul_pd_512;
4002
4003 Rep = Builder.CreateIntrinsic(
4004 IID,
4005 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
4006 } else {
4007 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
4008 }
4009 Rep =
4010 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
4011 } else if (Name.starts_with("avx512.mask.sub.p")) {
4012 if (Name.ends_with(".512")) {
4013 Intrinsic::ID IID;
4014 if (Name[17] == 's')
4015 IID = Intrinsic::x86_avx512_sub_ps_512;
4016 else
4017 IID = Intrinsic::x86_avx512_sub_pd_512;
4018
4019 Rep = Builder.CreateIntrinsic(
4020 IID,
4021 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
4022 } else {
4023 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
4024 }
4025 Rep =
4026 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
4027 } else if ((Name.starts_with("avx512.mask.max.p") ||
4028 Name.starts_with("avx512.mask.min.p")) &&
4029 Name.drop_front(18) == ".512") {
4030 bool IsDouble = Name[17] == 'd';
4031 bool IsMin = Name[13] == 'i';
4032 static const Intrinsic::ID MinMaxTbl[2][2] = {
4033 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
4034 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
4035 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
4036
4037 Rep = Builder.CreateIntrinsic(
4038 IID,
4039 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
4040 Rep =
4041 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
4042 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
4043 Rep =
4044 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
4045 {CI->getArgOperand(0), Builder.getInt1(false)});
4046 Rep =
4047 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
4048 } else if (Name.starts_with("avx512.mask.psll")) {
4049 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4050 bool IsVariable = Name[16] == 'v';
4051 char Size = Name[16] == '.' ? Name[17]
4052 : Name[17] == '.' ? Name[18]
4053 : Name[18] == '.' ? Name[19]
4054 : Name[20];
4055
4056 Intrinsic::ID IID;
4057 if (IsVariable && Name[17] != '.') {
4058 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
4059 IID = Intrinsic::x86_avx2_psllv_q;
4060 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
4061 IID = Intrinsic::x86_avx2_psllv_q_256;
4062 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
4063 IID = Intrinsic::x86_avx2_psllv_d;
4064 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
4065 IID = Intrinsic::x86_avx2_psllv_d_256;
4066 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
4067 IID = Intrinsic::x86_avx512_psllv_w_128;
4068 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
4069 IID = Intrinsic::x86_avx512_psllv_w_256;
4070 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
4071 IID = Intrinsic::x86_avx512_psllv_w_512;
4072 else
4073 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4074 } else if (Name.ends_with(".128")) {
4075 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
4076 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
4077 : Intrinsic::x86_sse2_psll_d;
4078 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
4079 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
4080 : Intrinsic::x86_sse2_psll_q;
4081 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
4082 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
4083 : Intrinsic::x86_sse2_psll_w;
4084 else
4085 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4086 } else if (Name.ends_with(".256")) {
4087 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
4088 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
4089 : Intrinsic::x86_avx2_psll_d;
4090 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
4091 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
4092 : Intrinsic::x86_avx2_psll_q;
4093 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
4094 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
4095 : Intrinsic::x86_avx2_psll_w;
4096 else
4097 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4098 } else {
4099 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
4100 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
4101 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
4102 : Intrinsic::x86_avx512_psll_d_512;
4103 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
4104 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
4105 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
4106 : Intrinsic::x86_avx512_psll_q_512;
4107 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
4108 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
4109 : Intrinsic::x86_avx512_psll_w_512;
4110 else
4111 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4112 }
4113
4114 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4115 } else if (Name.starts_with("avx512.mask.psrl")) {
4116 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4117 bool IsVariable = Name[16] == 'v';
4118 char Size = Name[16] == '.' ? Name[17]
4119 : Name[17] == '.' ? Name[18]
4120 : Name[18] == '.' ? Name[19]
4121 : Name[20];
4122
4123 Intrinsic::ID IID;
4124 if (IsVariable && Name[17] != '.') {
4125 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
4126 IID = Intrinsic::x86_avx2_psrlv_q;
4127 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
4128 IID = Intrinsic::x86_avx2_psrlv_q_256;
4129 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
4130 IID = Intrinsic::x86_avx2_psrlv_d;
4131 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
4132 IID = Intrinsic::x86_avx2_psrlv_d_256;
4133 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
4134 IID = Intrinsic::x86_avx512_psrlv_w_128;
4135 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
4136 IID = Intrinsic::x86_avx512_psrlv_w_256;
4137 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4138 IID = Intrinsic::x86_avx512_psrlv_w_512;
4139 else
4140 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4141 } else if (Name.ends_with(".128")) {
4142 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4143 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4144 : Intrinsic::x86_sse2_psrl_d;
4145 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4146 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4147 : Intrinsic::x86_sse2_psrl_q;
4148 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4149 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4150 : Intrinsic::x86_sse2_psrl_w;
4151 else
4152 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4153 } else if (Name.ends_with(".256")) {
4154 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4155 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4156 : Intrinsic::x86_avx2_psrl_d;
4157 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4158 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4159 : Intrinsic::x86_avx2_psrl_q;
4160 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4161 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4162 : Intrinsic::x86_avx2_psrl_w;
4163 else
4164 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4165 } else {
4166 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4167 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4168 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4169 : Intrinsic::x86_avx512_psrl_d_512;
4170 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4171 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4172 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4173 : Intrinsic::x86_avx512_psrl_q_512;
4174 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4175 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4176 : Intrinsic::x86_avx512_psrl_w_512;
4177 else
4178 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4179 }
4180
4181 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4182 } else if (Name.starts_with("avx512.mask.psra")) {
4183 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4184 bool IsVariable = Name[16] == 'v';
4185 char Size = Name[16] == '.' ? Name[17]
4186 : Name[17] == '.' ? Name[18]
4187 : Name[18] == '.' ? Name[19]
4188 : Name[20];
4189
4190 Intrinsic::ID IID;
4191 if (IsVariable && Name[17] != '.') {
4192 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4193 IID = Intrinsic::x86_avx2_psrav_d;
4194 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4195 IID = Intrinsic::x86_avx2_psrav_d_256;
4196 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4197 IID = Intrinsic::x86_avx512_psrav_w_128;
4198 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4199 IID = Intrinsic::x86_avx512_psrav_w_256;
4200 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4201 IID = Intrinsic::x86_avx512_psrav_w_512;
4202 else
4203 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4204 } else if (Name.ends_with(".128")) {
4205 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4206 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4207 : Intrinsic::x86_sse2_psra_d;
4208 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4209 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4210 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4211 : Intrinsic::x86_avx512_psra_q_128;
4212 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4213 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4214 : Intrinsic::x86_sse2_psra_w;
4215 else
4216 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4217 } else if (Name.ends_with(".256")) {
4218 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4219 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4220 : Intrinsic::x86_avx2_psra_d;
4221 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4222 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4223 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4224 : Intrinsic::x86_avx512_psra_q_256;
4225 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4226 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4227 : Intrinsic::x86_avx2_psra_w;
4228 else
4229 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4230 } else {
4231 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4232 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4233 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4234 : Intrinsic::x86_avx512_psra_d_512;
4235 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4236 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4237 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4238 : Intrinsic::x86_avx512_psra_q_512;
4239 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4240 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4241 : Intrinsic::x86_avx512_psra_w_512;
4242 else
4243 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4244 }
4245
4246 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4247 } else if (Name.starts_with("avx512.mask.move.s")) {
4248 Rep = upgradeMaskedMove(Builder, *CI);
4249 } else if (Name.starts_with("avx512.cvtmask2")) {
4250 Rep = upgradeMaskToInt(Builder, *CI);
4251 } else if (Name.ends_with(".movntdqa")) {
4253 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4254
4255 LoadInst *LI = Builder.CreateAlignedLoad(
4256 CI->getType(), CI->getArgOperand(0),
4258 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4259 Rep = LI;
4260 } else if (Name.starts_with("fma.vfmadd.") ||
4261 Name.starts_with("fma.vfmsub.") ||
4262 Name.starts_with("fma.vfnmadd.") ||
4263 Name.starts_with("fma.vfnmsub.")) {
4264 bool NegMul = Name[6] == 'n';
4265 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4266 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4267
4268 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4269 CI->getArgOperand(2)};
4270
4271 if (IsScalar) {
4272 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4273 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4274 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4275 }
4276
4277 if (NegMul && !IsScalar)
4278 Ops[0] = Builder.CreateFNeg(Ops[0]);
4279 if (NegMul && IsScalar)
4280 Ops[1] = Builder.CreateFNeg(Ops[1]);
4281 if (NegAcc)
4282 Ops[2] = Builder.CreateFNeg(Ops[2]);
4283
4284 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4285
4286 if (IsScalar)
4287 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4288 } else if (Name.starts_with("fma4.vfmadd.s")) {
4289 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4290 CI->getArgOperand(2)};
4291
4292 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4293 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4294 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4295
4296 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4297
4298 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4299 Rep, (uint64_t)0);
4300 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4301 Name.starts_with("avx512.maskz.vfmadd.s") ||
4302 Name.starts_with("avx512.mask3.vfmadd.s") ||
4303 Name.starts_with("avx512.mask3.vfmsub.s") ||
4304 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4305 bool IsMask3 = Name[11] == '3';
4306 bool IsMaskZ = Name[11] == 'z';
4307 // Drop the "avx512.mask." to make it easier.
4308 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4309 bool NegMul = Name[2] == 'n';
4310 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4311
4312 Value *A = CI->getArgOperand(0);
4313 Value *B = CI->getArgOperand(1);
4314 Value *C = CI->getArgOperand(2);
4315
4316 if (NegMul && (IsMask3 || IsMaskZ))
4317 A = Builder.CreateFNeg(A);
4318 if (NegMul && !(IsMask3 || IsMaskZ))
4319 B = Builder.CreateFNeg(B);
4320 if (NegAcc)
4321 C = Builder.CreateFNeg(C);
4322
4323 A = Builder.CreateExtractElement(A, (uint64_t)0);
4324 B = Builder.CreateExtractElement(B, (uint64_t)0);
4325 C = Builder.CreateExtractElement(C, (uint64_t)0);
4326
4327 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4328 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4329 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4330
4331 Intrinsic::ID IID;
4332 if (Name.back() == 'd')
4333 IID = Intrinsic::x86_avx512_vfmadd_f64;
4334 else
4335 IID = Intrinsic::x86_avx512_vfmadd_f32;
4336 Rep = Builder.CreateIntrinsic(IID, Ops);
4337 } else {
4338 Rep = Builder.CreateFMA(A, B, C);
4339 }
4340
4341 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4342 : IsMask3 ? C
4343 : A;
4344
4345 // For Mask3 with NegAcc, we need to create a new extractelement that
4346 // avoids the negation above.
4347 if (NegAcc && IsMask3)
4348 PassThru =
4349 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4350
4351 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4352 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4353 (uint64_t)0);
4354 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4355 Name.starts_with("avx512.mask.vfnmadd.p") ||
4356 Name.starts_with("avx512.mask.vfnmsub.p") ||
4357 Name.starts_with("avx512.mask3.vfmadd.p") ||
4358 Name.starts_with("avx512.mask3.vfmsub.p") ||
4359 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4360 Name.starts_with("avx512.maskz.vfmadd.p")) {
4361 bool IsMask3 = Name[11] == '3';
4362 bool IsMaskZ = Name[11] == 'z';
4363 // Drop the "avx512.mask." to make it easier.
4364 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4365 bool NegMul = Name[2] == 'n';
4366 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4367
4368 Value *A = CI->getArgOperand(0);
4369 Value *B = CI->getArgOperand(1);
4370 Value *C = CI->getArgOperand(2);
4371
4372 if (NegMul && (IsMask3 || IsMaskZ))
4373 A = Builder.CreateFNeg(A);
4374 if (NegMul && !(IsMask3 || IsMaskZ))
4375 B = Builder.CreateFNeg(B);
4376 if (NegAcc)
4377 C = Builder.CreateFNeg(C);
4378
4379 if (CI->arg_size() == 5 &&
4380 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4381 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4382 Intrinsic::ID IID;
4383 // Check the character before ".512" in string.
4384 if (Name[Name.size() - 5] == 's')
4385 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4386 else
4387 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4388
4389 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4390 } else {
4391 Rep = Builder.CreateFMA(A, B, C);
4392 }
4393
4394 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4395 : IsMask3 ? CI->getArgOperand(2)
4396 : CI->getArgOperand(0);
4397
4398 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4399 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4400 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4401 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4402 Intrinsic::ID IID;
4403 if (VecWidth == 128 && EltWidth == 32)
4404 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4405 else if (VecWidth == 256 && EltWidth == 32)
4406 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4407 else if (VecWidth == 128 && EltWidth == 64)
4408 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4409 else if (VecWidth == 256 && EltWidth == 64)
4410 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4411 else
4412 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4413
4414 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4415 CI->getArgOperand(2)};
4416 Ops[2] = Builder.CreateFNeg(Ops[2]);
4417 Rep = Builder.CreateIntrinsic(IID, Ops);
4418 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4419 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4420 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4421 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4422 bool IsMask3 = Name[11] == '3';
4423 bool IsMaskZ = Name[11] == 'z';
4424 // Drop the "avx512.mask." to make it easier.
4425 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4426 bool IsSubAdd = Name[3] == 's';
4427 if (CI->arg_size() == 5) {
4428 Intrinsic::ID IID;
4429 // Check the character before ".512" in string.
4430 if (Name[Name.size() - 5] == 's')
4431 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4432 else
4433 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4434
4435 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4436 CI->getArgOperand(2), CI->getArgOperand(4)};
4437 if (IsSubAdd)
4438 Ops[2] = Builder.CreateFNeg(Ops[2]);
4439
4440 Rep = Builder.CreateIntrinsic(IID, Ops);
4441 } else {
4442 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4443
4444 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4445 CI->getArgOperand(2)};
4446
4448 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4449 Value *Odd = Builder.CreateCall(FMA, Ops);
4450 Ops[2] = Builder.CreateFNeg(Ops[2]);
4451 Value *Even = Builder.CreateCall(FMA, Ops);
4452
4453 if (IsSubAdd)
4454 std::swap(Even, Odd);
4455
4456 SmallVector<int, 32> Idxs(NumElts);
4457 for (int i = 0; i != NumElts; ++i)
4458 Idxs[i] = i + (i % 2) * NumElts;
4459
4460 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4461 }
4462
4463 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4464 : IsMask3 ? CI->getArgOperand(2)
4465 : CI->getArgOperand(0);
4466
4467 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4468 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4469 Name.starts_with("avx512.maskz.pternlog.")) {
4470 bool ZeroMask = Name[11] == 'z';
4471 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4472 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4473 Intrinsic::ID IID;
4474 if (VecWidth == 128 && EltWidth == 32)
4475 IID = Intrinsic::x86_avx512_pternlog_d_128;
4476 else if (VecWidth == 256 && EltWidth == 32)
4477 IID = Intrinsic::x86_avx512_pternlog_d_256;
4478 else if (VecWidth == 512 && EltWidth == 32)
4479 IID = Intrinsic::x86_avx512_pternlog_d_512;
4480 else if (VecWidth == 128 && EltWidth == 64)
4481 IID = Intrinsic::x86_avx512_pternlog_q_128;
4482 else if (VecWidth == 256 && EltWidth == 64)
4483 IID = Intrinsic::x86_avx512_pternlog_q_256;
4484 else if (VecWidth == 512 && EltWidth == 64)
4485 IID = Intrinsic::x86_avx512_pternlog_q_512;
4486 else
4487 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4488
4489 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4490 CI->getArgOperand(2), CI->getArgOperand(3)};
4491 Rep = Builder.CreateIntrinsic(IID, Args);
4492 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4493 : CI->getArgOperand(0);
4494 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4495 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4496 Name.starts_with("avx512.maskz.vpmadd52")) {
4497 bool ZeroMask = Name[11] == 'z';
4498 bool High = Name[20] == 'h' || Name[21] == 'h';
4499 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4500 Intrinsic::ID IID;
4501 if (VecWidth == 128 && !High)
4502 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4503 else if (VecWidth == 256 && !High)
4504 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4505 else if (VecWidth == 512 && !High)
4506 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4507 else if (VecWidth == 128 && High)
4508 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4509 else if (VecWidth == 256 && High)
4510 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4511 else if (VecWidth == 512 && High)
4512 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4513 else
4514 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4515
4516 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4517 CI->getArgOperand(2)};
4518 Rep = Builder.CreateIntrinsic(IID, Args);
4519 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4520 : CI->getArgOperand(0);
4521 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4522 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4523 Name.starts_with("avx512.mask.vpermt2var.") ||
4524 Name.starts_with("avx512.maskz.vpermt2var.")) {
4525 bool ZeroMask = Name[11] == 'z';
4526 bool IndexForm = Name[17] == 'i';
4527 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4528 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4529 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4530 Name.starts_with("avx512.mask.vpdpbusds.") ||
4531 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4532 bool ZeroMask = Name[11] == 'z';
4533 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4534 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4535 Intrinsic::ID IID;
4536 if (VecWidth == 128 && !IsSaturating)
4537 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4538 else if (VecWidth == 256 && !IsSaturating)
4539 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4540 else if (VecWidth == 512 && !IsSaturating)
4541 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4542 else if (VecWidth == 128 && IsSaturating)
4543 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4544 else if (VecWidth == 256 && IsSaturating)
4545 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4546 else if (VecWidth == 512 && IsSaturating)
4547 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4548 else
4549 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4550
4551 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4552 CI->getArgOperand(2)};
4553
4554 // Input arguments types were incorrectly set to vectors of i32 before but
4555 // they should be vectors of i8. Insert bit cast when encountering the old
4556 // types
4557 if (Args[1]->getType()->isVectorTy() &&
4558 cast<VectorType>(Args[1]->getType())
4559 ->getElementType()
4560 ->isIntegerTy(32) &&
4561 Args[2]->getType()->isVectorTy() &&
4562 cast<VectorType>(Args[2]->getType())
4563 ->getElementType()
4564 ->isIntegerTy(32)) {
4565 Type *NewArgType = nullptr;
4566 if (VecWidth == 128)
4567 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4568 else if (VecWidth == 256)
4569 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4570 else if (VecWidth == 512)
4571 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4572 else
4573 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4574 CI);
4575
4576 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4577 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4578 }
4579
4580 Rep = Builder.CreateIntrinsic(IID, Args);
4581 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4582 : CI->getArgOperand(0);
4583 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4584 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4585 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4586 Name.starts_with("avx512.mask.vpdpwssds.") ||
4587 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4588 bool ZeroMask = Name[11] == 'z';
4589 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4590 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4591 Intrinsic::ID IID;
4592 if (VecWidth == 128 && !IsSaturating)
4593 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4594 else if (VecWidth == 256 && !IsSaturating)
4595 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4596 else if (VecWidth == 512 && !IsSaturating)
4597 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4598 else if (VecWidth == 128 && IsSaturating)
4599 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4600 else if (VecWidth == 256 && IsSaturating)
4601 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4602 else if (VecWidth == 512 && IsSaturating)
4603 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4604 else
4605 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4606
4607 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4608 CI->getArgOperand(2)};
4609
4610 // Input arguments types were incorrectly set to vectors of i32 before but
4611 // they should be vectors of i16. Insert bit cast when encountering the old
4612 // types
4613 if (Args[1]->getType()->isVectorTy() &&
4614 cast<VectorType>(Args[1]->getType())
4615 ->getElementType()
4616 ->isIntegerTy(32) &&
4617 Args[2]->getType()->isVectorTy() &&
4618 cast<VectorType>(Args[2]->getType())
4619 ->getElementType()
4620 ->isIntegerTy(32)) {
4621 Type *NewArgType = nullptr;
4622 if (VecWidth == 128)
4623 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4624 else if (VecWidth == 256)
4625 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4626 else if (VecWidth == 512)
4627 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4628 else
4629 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4630 CI);
4631
4632 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4633 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4634 }
4635
4636 Rep = Builder.CreateIntrinsic(IID, Args);
4637 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4638 : CI->getArgOperand(0);
4639 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4640 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4641 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4642 Name == "subborrow.u32" || Name == "subborrow.u64") {
4643 Intrinsic::ID IID;
4644 if (Name[0] == 'a' && Name.back() == '2')
4645 IID = Intrinsic::x86_addcarry_32;
4646 else if (Name[0] == 'a' && Name.back() == '4')
4647 IID = Intrinsic::x86_addcarry_64;
4648 else if (Name[0] == 's' && Name.back() == '2')
4649 IID = Intrinsic::x86_subborrow_32;
4650 else if (Name[0] == 's' && Name.back() == '4')
4651 IID = Intrinsic::x86_subborrow_64;
4652 else
4653 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4654
4655 // Make a call with 3 operands.
4656 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4657 CI->getArgOperand(2)};
4658 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4659
4660 // Extract the second result and store it.
4661 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4662 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4663 // Replace the original call result with the first result of the new call.
4664 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4665
4666 CI->replaceAllUsesWith(CF);
4667 Rep = nullptr;
4668 } else if (Name.starts_with("avx512.mask.") &&
4669 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4670 // Rep will be updated by the call in the condition.
4671 } else if (Name.starts_with("bmi.pdep.")) {
4672 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pdep);
4673 } else if (Name.starts_with("bmi.pext.")) {
4674 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pext);
4675 } else
4676 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4677
4678 return Rep;
4679}
4680
4682 Function *F, IRBuilder<> &Builder) {
4683 if (Name.starts_with("neon.bfcvt")) {
4684 if (Name.starts_with("neon.bfcvtn2")) {
4685 SmallVector<int, 32> LoMask(4);
4686 std::iota(LoMask.begin(), LoMask.end(), 0);
4687 SmallVector<int, 32> ConcatMask(8);
4688 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4689 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4690 Value *Trunc =
4691 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4692 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4693 } else if (Name.starts_with("neon.bfcvtn")) {
4694 SmallVector<int, 32> ConcatMask(8);
4695 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4696 Type *V4BF16 =
4697 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4698 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4699 dbgs() << "Trunc: " << *Trunc << "\n";
4700 return Builder.CreateShuffleVector(
4701 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4702 } else {
4703 return Builder.CreateFPTrunc(CI->getOperand(0),
4704 Type::getBFloatTy(F->getContext()));
4705 }
4706 } else if (Name.starts_with("sve.fcvt")) {
4707 Intrinsic::ID NewID =
4709 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4710 .Case("sve.fcvtnt.bf16f32",
4711 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4713 if (NewID == Intrinsic::not_intrinsic)
4714 llvm_unreachable("Unhandled Intrinsic!");
4715
4716 SmallVector<Value *, 3> Args(CI->args());
4717
4718 // The original intrinsics incorrectly used a predicate based on the
4719 // smallest element type rather than the largest.
4720 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4721 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4722
4723 if (Args[1]->getType() != BadPredTy)
4724 llvm_unreachable("Unexpected predicate type!");
4725
4726 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4727 BadPredTy, Args[1]);
4728 Args[1] = Builder.CreateIntrinsic(
4729 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4730
4731 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4732 CI->getName());
4733 }
4734
4735 if (Name == "neon.vcvtfp2hf")
4736 return Builder.CreateBitCast(
4737 Builder.CreateFPTrunc(
4738 CI->getOperand(0),
4739 FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
4740 FixedVectorType::get(Type::getInt16Ty(F->getContext()), 4));
4741 if (Name == "neon.vcvthf2fp")
4742 return Builder.CreateFPExt(
4743 Builder.CreateBitCast(
4744 CI->getOperand(0),
4745 FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
4746 FixedVectorType::get(Type::getFloatTy(F->getContext()), 4));
4747
4748 llvm_unreachable("Unhandled Intrinsic!");
4749}
4750
4752 IRBuilder<> &Builder) {
4753 if (Name == "mve.vctp64.old") {
4754 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4755 // correct type.
4756 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4757 CI->getArgOperand(0),
4758 /*FMFSource=*/nullptr, CI->getName());
4759 Value *C1 = Builder.CreateIntrinsic(
4760 Intrinsic::arm_mve_pred_v2i,
4761 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4762 return Builder.CreateIntrinsic(
4763 Intrinsic::arm_mve_pred_i2v,
4764 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4765 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4766 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4767 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4768 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4769 Name ==
4770 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4771 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4772 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4773 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4774 Name ==
4775 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4776 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4777 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4778 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4779 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4780 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4781 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4782 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4783 std::vector<Type *> Tys;
4784 unsigned ID = CI->getIntrinsicID();
4785 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4786 switch (ID) {
4787 case Intrinsic::arm_mve_mull_int_predicated:
4788 case Intrinsic::arm_mve_vqdmull_predicated:
4789 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4790 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4791 break;
4792 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4793 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4794 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4795 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4796 V2I1Ty};
4797 break;
4798 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4799 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4800 CI->getOperand(1)->getType(), V2I1Ty};
4801 break;
4802 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4803 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4804 CI->getOperand(2)->getType(), V2I1Ty};
4805 break;
4806 case Intrinsic::arm_cde_vcx1q_predicated:
4807 case Intrinsic::arm_cde_vcx1qa_predicated:
4808 case Intrinsic::arm_cde_vcx2q_predicated:
4809 case Intrinsic::arm_cde_vcx2qa_predicated:
4810 case Intrinsic::arm_cde_vcx3q_predicated:
4811 case Intrinsic::arm_cde_vcx3qa_predicated:
4812 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4813 break;
4814 default:
4815 llvm_unreachable("Unhandled Intrinsic!");
4816 }
4817
4818 std::vector<Value *> Ops;
4819 for (Value *Op : CI->args()) {
4820 Type *Ty = Op->getType();
4821 if (Ty->getScalarSizeInBits() == 1) {
4822 Value *C1 = Builder.CreateIntrinsic(
4823 Intrinsic::arm_mve_pred_v2i,
4824 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4825 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4826 }
4827 Ops.push_back(Op);
4828 }
4829
4830 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4831 CI->getName());
4832 }
4833 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4834}
4835
4836// These are expected to have the arguments:
4837// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4838//
4839// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4840//
4842 Function *F, IRBuilder<> &Builder) {
4843 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4844 // for compatibility.
4845 auto UpgradeLegacyWMMAIUIntrinsicCall =
4846 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4847 ArrayRef<Type *> OverloadTys) -> Value * {
4848 // Prepare arguments, append clamp=0 for compatibility
4849 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4850 Args.push_back(Builder.getFalse());
4851
4852 // Insert the declaration for the right overload types
4854 F->getParent(), F->getIntrinsicID(), OverloadTys);
4855
4856 // Copy operand bundles if any
4858 CI->getOperandBundlesAsDefs(Bundles);
4859
4860 // Create the new call and copy calling properties
4861 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4862 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4863 NewCall->setCallingConv(CI->getCallingConv());
4864 NewCall->setAttributes(CI->getAttributes());
4865 NewCall->setDebugLoc(CI->getDebugLoc());
4866 NewCall->copyMetadata(*CI);
4867 return NewCall;
4868 };
4869
4870 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4871 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4872 "intrinsic should have 7 arguments");
4873 Type *T1 = CI->getArgOperand(4)->getType();
4874 Type *T2 = CI->getArgOperand(1)->getType();
4875 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4876 }
4877 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4878 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4879 "intrinsic should have 8 arguments");
4880 Type *T1 = CI->getArgOperand(4)->getType();
4881 Type *T2 = CI->getArgOperand(1)->getType();
4882 Type *T3 = CI->getArgOperand(3)->getType();
4883 Type *T4 = CI->getArgOperand(5)->getType();
4884 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4885 }
4886
4887 switch (F->getIntrinsicID()) {
4888 default:
4889 break;
4890 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4891 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4892 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4893 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4894 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4895 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4896 // Drop src0 and src1 modifiers.
4897 const Value *Op0 = CI->getArgOperand(0);
4898 const Value *Op2 = CI->getArgOperand(2);
4899 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4900 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4901 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4902 if (!ModA->isZero() || !ModB->isZero())
4903 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4904
4906 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4907 Args.push_back(CI->getArgOperand(I));
4908
4909 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4910 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4911 Overloads.push_back(Args[3]->getType());
4913 F->getParent(), F->getIntrinsicID(), Overloads);
4914
4916 CI->getOperandBundlesAsDefs(Bundles);
4917
4918 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4919 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4920 NewCall->setCallingConv(CI->getCallingConv());
4921 NewCall->setAttributes(CI->getAttributes());
4922 NewCall->setDebugLoc(CI->getDebugLoc());
4923 NewCall->copyMetadata(*CI);
4924 NewCall->takeName(CI);
4925 return NewCall;
4926 }
4927 }
4928
4929 AtomicRMWInst::BinOp RMWOp =
4931 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4932 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4933 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4934 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4935 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4936 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4937 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4938 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4939 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4940 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4941 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4942 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4943 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4944
4945 unsigned NumOperands = CI->getNumOperands();
4946 if (NumOperands < 3) // Malformed bitcode.
4947 return nullptr;
4948
4949 Value *Ptr = CI->getArgOperand(0);
4950 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4951 if (!PtrTy) // Malformed.
4952 return nullptr;
4953
4954 Value *Val = CI->getArgOperand(1);
4955 if (Val->getType() != CI->getType()) // Malformed.
4956 return nullptr;
4957
4958 ConstantInt *OrderArg = nullptr;
4959 bool IsVolatile = false;
4960
4961 // These should have 5 arguments (plus the callee). A separate version of the
4962 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4963 if (NumOperands > 3)
4964 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4965
4966 // Ignore scope argument at 3
4967
4968 if (NumOperands > 5) {
4969 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4970 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4971 }
4972
4974 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4975 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4978
4979 LLVMContext &Ctx = F->getContext();
4980
4981 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4982 Type *RetTy = CI->getType();
4983 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4984 if (VT->getElementType()->isIntegerTy(16)) {
4985 VectorType *AsBF16 =
4986 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4987 Val = Builder.CreateBitCast(Val, AsBF16);
4988 }
4989 }
4990
4991 // The scope argument never really worked correctly. Use agent as the most
4992 // conservative option which should still always produce the instruction.
4993 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4994 AtomicRMWInst *RMW =
4995 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4996
4997 unsigned AddrSpace = PtrTy->getAddressSpace();
4998 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4999 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
5000 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
5001 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
5002 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
5003 }
5004
5005 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
5006 MDBuilder MDB(F->getContext());
5007 MDNode *RangeNotPrivate =
5010 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
5011 }
5012
5013 if (IsVolatile)
5014 RMW->setVolatile(true);
5015
5016 return Builder.CreateBitCast(RMW, RetTy);
5017}
5018
5019/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
5020/// plain MDNode, as it's the verifier's job to check these are the correct
5021/// types later.
5022static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
5023 if (Op < CI->arg_size()) {
5024 if (MetadataAsValue *MAV =
5026 Metadata *MD = MAV->getMetadata();
5027 return dyn_cast_if_present<MDNode>(MD);
5028 }
5029 }
5030 return nullptr;
5031}
5032
5033/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
5034static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
5035 if (Op < CI->arg_size())
5037 return MAV->getMetadata();
5038 return nullptr;
5039}
5040
5041/// Convert debug intrinsic calls to non-instruction debug records.
5042/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
5043/// \p CI - The debug intrinsic call.
5045 DbgRecord *DR = nullptr;
5046 if (Name == "label") {
5048 } else if (Name == "assign") {
5051 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
5052 unwrapMAVMetadataOp(CI, 4),
5053 /*The address is a Value ref, it will be stored as a Metadata */
5054 unwrapMAVOp(CI, 5));
5055 } else if (Name == "declare") {
5058 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr);
5059 } else if (Name == "addr") {
5060 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
5061 MDNode *ExprNode = unwrapMAVOp(CI, 2);
5062 // Don't try to add something to the expression if it's not an expression.
5063 // Instead, allow the verifier to fail later.
5064 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
5065 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
5066 }
5069 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr);
5070 } else if (Name == "value") {
5071 // An old version of dbg.value had an extra offset argument.
5072 unsigned VarOp = 1;
5073 unsigned ExprOp = 2;
5074 if (CI->arg_size() == 4) {
5076 // Nonzero offset dbg.values get dropped without a replacement.
5077 if (!Offset || !Offset->isNullValue())
5078 return;
5079 VarOp = 2;
5080 ExprOp = 3;
5081 }
5084 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
5085 nullptr);
5086 }
5087 DR->setDebugLoc(CI->getDebugLoc());
5088 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
5089 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
5090}
5091
5094 if (!Offset)
5095 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
5096 int64_t OffsetVal = Offset->getSExtValue();
5097 return Builder.CreateIntrinsic(OffsetVal >= 0
5098 ? Intrinsic::vector_splice_left
5099 : Intrinsic::vector_splice_right,
5100 CI->getType(),
5101 {CI->getArgOperand(0), CI->getArgOperand(1),
5102 Builder.getInt32(std::abs(OffsetVal))});
5103}
5104
5106 Function *F, IRBuilder<> &Builder) {
5107 if (Name.starts_with("to.fp16")) {
5108 Value *Cast =
5109 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
5110 return Builder.CreateBitCast(Cast, CI->getType());
5111 }
5112
5113 if (Name.starts_with("from.fp16")) {
5114 Value *Cast =
5115 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
5116 return Builder.CreateFPExt(Cast, CI->getType());
5117 }
5118
5119 return nullptr;
5120}
5121
5123 IRBuilder<> &Builder) {
5124 Intrinsic::ID IID = NewFn->getIntrinsicID();
5125
5126 auto [FirstDefault, Defaults] = Intrinsic::getAllDefaultArgValues(IID);
5127 if (Defaults.empty())
5128 return false;
5129
5130 unsigned OldArgCount = CI->arg_size();
5131 unsigned NewArgCount = NewFn->arg_size();
5132
5133 // If the caller already supplied all arguments (or more), nothing to do.
5134 // This mirrors C++ semantics: an explicitly-passed value is never overridden.
5135 if (OldArgCount >= NewArgCount)
5136 return false;
5137
5138 // Start with the existing arguments from the old call.
5139 SmallVector<Value *, 8> NewArgs(CI->args());
5140
5141 // Defaults are a contiguous trailing block, so checking the first missing
5142 // argument is enough.
5143 if (OldArgCount < FirstDefault)
5144 return false;
5145
5146 // Fill in each missing trailing argument from the table.
5147 FunctionType *NewFT = NewFn->getFunctionType();
5148 for (unsigned Idx = OldArgCount; Idx < NewArgCount; ++Idx) {
5149 assert(Idx >= FirstDefault && Idx - FirstDefault < Defaults.size() &&
5150 "missing argument outside the default range");
5151 Type *ParamTy = NewFT->getParamType(Idx);
5152
5153 // Only integer types are supported (i1, i8, i16, i32, i64).
5154 if (!ParamTy->isIntegerTy())
5155 return false;
5156 NewArgs.push_back(ConstantInt::get(ParamTy, Defaults[Idx - FirstDefault]));
5157 }
5158
5159 // Preserve operand bundles by creating the call with them.
5161 CI->getOperandBundlesAsDefs(OpBundles);
5162 CallInst *NewCall = Builder.CreateCall(NewFn, NewArgs, OpBundles);
5163
5164 NewCall->takeName(CI);
5165 NewCall->setCallingConv(CI->getCallingConv());
5166 NewCall->copyMetadata(*CI);
5167 if (auto *OldCI = dyn_cast<CallInst>(CI))
5168 NewCall->setTailCallKind(OldCI->getTailCallKind());
5169
5170 CI->replaceAllUsesWith(NewCall);
5171 CI->eraseFromParent();
5172 return true;
5173}
5174
5175/// Upgrade a call to an old intrinsic. All argument and return casting must be
5176/// provided to seamlessly integrate with existing context.
5178 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
5179 // checks the callee's function type matches. It's likely we need to handle
5180 // type changes here.
5182 if (!F)
5183 return;
5184
5185 LLVMContext &C = CI->getContext();
5186 IRBuilder<> Builder(C);
5187 if (isa<FPMathOperator>(CI))
5188 Builder.setFastMathFlags(CI->getFastMathFlags());
5189 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
5190
5191 if (!NewFn) {
5192 // Get the Function's name.
5193 StringRef Name = F->getName();
5194 if (!Name.consume_front("llvm."))
5195 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
5196
5197 bool IsX86 = Name.consume_front("x86.");
5198 bool IsNVVM = Name.consume_front("nvvm.");
5199 bool IsAArch64 = Name.consume_front("aarch64.");
5200 bool IsARM = Name.consume_front("arm.");
5201 bool IsAMDGCN = Name.consume_front("amdgcn.");
5202 bool IsDbg = Name.consume_front("dbg.");
5203 bool IsOldSplice =
5204 (Name.consume_front("experimental.vector.splice") ||
5205 Name.consume_front("vector.splice")) &&
5206 !(Name.starts_with(".left") || Name.starts_with(".right"));
5207 Value *Rep = nullptr;
5208
5209 if (!IsX86 && Name == "stackprotectorcheck") {
5210 Rep = nullptr;
5211 } else if (IsNVVM) {
5212 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5213 } else if (IsX86) {
5214 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5215 } else if (IsAArch64) {
5216 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5217 } else if (IsARM) {
5218 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5219 } else if (IsAMDGCN) {
5220 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5221 } else if (IsDbg) {
5223 } else if (IsOldSplice) {
5224 Rep = upgradeVectorSplice(CI, Builder);
5225 } else if (Name.consume_front("convert.")) {
5226 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5227 } else if (Name == "lifetime.start.i64" || Name == "lifetime.end.i64") {
5228 // Delete calls to invalid @llvm.lifetime.{start,end}.i64 intrinsics.
5229 Rep = nullptr;
5230 } else {
5231 llvm_unreachable("Unknown function for CallBase upgrade.");
5232 }
5233
5234 if (Rep)
5235 CI->replaceAllUsesWith(Rep);
5236 CI->eraseFromParent();
5237 return;
5238 }
5239
5240 const auto &DefaultCase = [&]() -> void {
5241 if (F == NewFn)
5242 return;
5243
5244 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5245 // Handle generic mangling change.
5246 assert(
5247 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5248 "Unknown function for CallBase upgrade and isn't just a name change");
5249 CI->setCalledFunction(NewFn);
5250 return;
5251 }
5252
5253 // This must be an upgrade from a named to a literal struct.
5254 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5255 assert(OldST != NewFn->getReturnType() &&
5256 "Return type must have changed");
5257 assert(OldST->getNumElements() ==
5258 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5259 "Must have same number of elements");
5260
5261 SmallVector<Value *> Args(CI->args());
5262 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5263 NewCI->setAttributes(CI->getAttributes());
5264 Value *Res = PoisonValue::get(OldST);
5265 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5266 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5267 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5268 }
5269 CI->replaceAllUsesWith(Res);
5270 CI->eraseFromParent();
5271 return;
5272 }
5273
5274 // We're probably about to produce something invalid. Let the verifier catch
5275 // it instead of dying here.
5276 CI->setCalledOperand(
5278 return;
5279 };
5280 CallInst *NewCall = nullptr;
5281 switch (NewFn->getIntrinsicID()) {
5282 default: {
5283 // Last resort: try the data-driven default-arg upgrade.
5284 // Handles any intrinsic annotated with ImmArg<..., DefaultValue<...>>
5285 // in its .td definition, without needing a dedicated case.
5286 if (upgradeIntrinsicCallWithDefaultArgs(CI, NewFn, Builder))
5287 return;
5288 DefaultCase();
5289 return;
5290 }
5291 case Intrinsic::arm_neon_vst1:
5292 case Intrinsic::arm_neon_vst2:
5293 case Intrinsic::arm_neon_vst3:
5294 case Intrinsic::arm_neon_vst4:
5295 case Intrinsic::arm_neon_vst2lane:
5296 case Intrinsic::arm_neon_vst3lane:
5297 case Intrinsic::arm_neon_vst4lane: {
5298 SmallVector<Value *, 4> Args(CI->args());
5299 NewCall = Builder.CreateCall(NewFn, Args);
5300 break;
5301 }
5302 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5303 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5304 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5305 LLVMContext &Ctx = F->getParent()->getContext();
5306 SmallVector<Value *, 4> Args(CI->args());
5307 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5308 cast<ConstantInt>(Args[3])->getZExtValue());
5309 NewCall = Builder.CreateCall(NewFn, Args);
5310 break;
5311 }
5312 case Intrinsic::aarch64_sve_ld3_sret:
5313 case Intrinsic::aarch64_sve_ld4_sret:
5314 case Intrinsic::aarch64_sve_ld2_sret: {
5315 // Is this a trivial remangle of the name to support ptr address spaces?
5316 if (isa<StructType>(F->getReturnType())) {
5317 DefaultCase();
5318 return;
5319 }
5320
5321 StringRef Name = F->getName();
5322 Name = Name.substr(5);
5323 unsigned N = StringSwitch<unsigned>(Name)
5324 .StartsWith("aarch64.sve.ld2", 2)
5325 .StartsWith("aarch64.sve.ld3", 3)
5326 .StartsWith("aarch64.sve.ld4", 4)
5327 .Default(0);
5328 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5329 unsigned MinElts = RetTy->getMinNumElements() / N;
5330 SmallVector<Value *, 2> Args(CI->args());
5331 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5332 Value *Ret = llvm::PoisonValue::get(RetTy);
5333 for (unsigned I = 0; I < N; I++) {
5334 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5335 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5336 }
5337 NewCall = dyn_cast<CallInst>(Ret);
5338 break;
5339 }
5340
5341 case Intrinsic::coro_end: {
5342 SmallVector<Value *, 3> Args(CI->args());
5343 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5344 NewCall = Builder.CreateCall(NewFn, Args);
5345 break;
5346 }
5347
5348 case Intrinsic::vector_extract: {
5349 StringRef Name = F->getName();
5350 Name = Name.substr(5); // Strip llvm
5351 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5352 DefaultCase();
5353 return;
5354 }
5355 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5356 unsigned MinElts = RetTy->getMinNumElements();
5357 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5358 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5359 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5360 break;
5361 }
5362
5363 case Intrinsic::vector_insert: {
5364 StringRef Name = F->getName();
5365 Name = Name.substr(5);
5366 if (!Name.starts_with("aarch64.sve.tuple")) {
5367 DefaultCase();
5368 return;
5369 }
5370 if (Name.starts_with("aarch64.sve.tuple.set")) {
5371 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5372 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5373 Value *NewIdx =
5374 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5375 NewCall = Builder.CreateCall(
5376 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5377 break;
5378 }
5379 if (Name.starts_with("aarch64.sve.tuple.create")) {
5380 unsigned N = StringSwitch<unsigned>(Name)
5381 .StartsWith("aarch64.sve.tuple.create2", 2)
5382 .StartsWith("aarch64.sve.tuple.create3", 3)
5383 .StartsWith("aarch64.sve.tuple.create4", 4)
5384 .Default(0);
5385 assert(N > 1 && "Create is expected to be between 2-4");
5386 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5387 Value *Ret = llvm::PoisonValue::get(RetTy);
5388 unsigned MinElts = RetTy->getMinNumElements() / N;
5389 for (unsigned I = 0; I < N; I++) {
5390 Value *V = CI->getArgOperand(I);
5391 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5392 }
5393 NewCall = dyn_cast<CallInst>(Ret);
5394 }
5395 break;
5396 }
5397
5398 case Intrinsic::arm_neon_bfdot:
5399 case Intrinsic::arm_neon_bfmmla:
5400 case Intrinsic::arm_neon_bfmlalb:
5401 case Intrinsic::arm_neon_bfmlalt:
5402 case Intrinsic::aarch64_neon_bfdot:
5403 case Intrinsic::aarch64_neon_bfmmla:
5404 case Intrinsic::aarch64_neon_bfmlalb:
5405 case Intrinsic::aarch64_neon_bfmlalt: {
5407 assert(CI->arg_size() == 3 &&
5408 "Mismatch between function args and call args");
5409 size_t OperandWidth =
5411 assert((OperandWidth == 64 || OperandWidth == 128) &&
5412 "Unexpected operand width");
5413 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5414 auto Iter = CI->args().begin();
5415 Args.push_back(*Iter++);
5416 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5417 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5418 NewCall = Builder.CreateCall(NewFn, Args);
5419 break;
5420 }
5421
5422 case Intrinsic::bitreverse:
5423 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5424 break;
5425
5426 case Intrinsic::ctlz:
5427 case Intrinsic::cttz: {
5428 if (CI->arg_size() != 1) {
5429 DefaultCase();
5430 return;
5431 }
5432
5433 NewCall =
5434 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5435 break;
5436 }
5437
5438 case Intrinsic::objectsize: {
5439 Value *NullIsUnknownSize =
5440 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5441 Value *Dynamic =
5442 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5443 NewCall = Builder.CreateCall(
5444 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5445 break;
5446 }
5447
5448 case Intrinsic::ctpop:
5449 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5450 break;
5451 case Intrinsic::dbg_value: {
5452 StringRef Name = F->getName();
5453 Name = Name.substr(5); // Strip llvm.
5454 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5455 if (Name.starts_with("dbg.addr")) {
5457 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5458 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5459 NewCall =
5460 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5461 MetadataAsValue::get(C, Expr)});
5462 break;
5463 }
5464
5465 // Upgrade from the old version that had an extra offset argument.
5466 assert(CI->arg_size() == 4);
5467 // Drop nonzero offsets instead of attempting to upgrade them.
5469 if (Offset->isNullValue()) {
5470 NewCall = Builder.CreateCall(
5471 NewFn,
5472 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5473 break;
5474 }
5475 CI->eraseFromParent();
5476 return;
5477 }
5478
5479 case Intrinsic::ptr_annotation:
5480 // Upgrade from versions that lacked the annotation attribute argument.
5481 if (CI->arg_size() != 4) {
5482 DefaultCase();
5483 return;
5484 }
5485
5486 // Create a new call with an added null annotation attribute argument.
5487 NewCall = Builder.CreateCall(
5488 NewFn,
5489 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5490 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5491 NewCall->takeName(CI);
5492 CI->replaceAllUsesWith(NewCall);
5493 CI->eraseFromParent();
5494 return;
5495
5496 case Intrinsic::var_annotation:
5497 // Upgrade from versions that lacked the annotation attribute argument.
5498 if (CI->arg_size() != 4) {
5499 DefaultCase();
5500 return;
5501 }
5502 // Create a new call with an added null annotation attribute argument.
5503 NewCall = Builder.CreateCall(
5504 NewFn,
5505 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5506 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5507 NewCall->takeName(CI);
5508 CI->replaceAllUsesWith(NewCall);
5509 CI->eraseFromParent();
5510 return;
5511
5512 case Intrinsic::riscv_aes32dsi:
5513 case Intrinsic::riscv_aes32dsmi:
5514 case Intrinsic::riscv_aes32esi:
5515 case Intrinsic::riscv_aes32esmi:
5516 case Intrinsic::riscv_sm4ks:
5517 case Intrinsic::riscv_sm4ed: {
5518 // The last argument to these intrinsics used to be i8 and changed to i32.
5519 // The type overload for sm4ks and sm4ed was removed.
5520 Value *Arg2 = CI->getArgOperand(2);
5521 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5522 return;
5523
5524 Value *Arg0 = CI->getArgOperand(0);
5525 Value *Arg1 = CI->getArgOperand(1);
5526 if (CI->getType()->isIntegerTy(64)) {
5527 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5528 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5529 }
5530
5531 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5532 cast<ConstantInt>(Arg2)->getZExtValue());
5533
5534 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5535 Value *Res = NewCall;
5536 if (Res->getType() != CI->getType())
5537 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5538 NewCall->takeName(CI);
5539 CI->replaceAllUsesWith(Res);
5540 CI->eraseFromParent();
5541 return;
5542 }
5543 case Intrinsic::nvvm_mapa_shared_cluster: {
5544 // Create a new call with the correct address space.
5545 NewCall =
5546 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5547 Value *Res = NewCall;
5548 Res = Builder.CreateAddrSpaceCast(
5549 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5550 NewCall->takeName(CI);
5551 CI->replaceAllUsesWith(Res);
5552 CI->eraseFromParent();
5553 return;
5554 }
5555 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5556 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5557 // Create a new call with the correct address space.
5558 SmallVector<Value *, 4> Args(CI->args());
5559 Args[0] = Builder.CreateAddrSpaceCast(
5560 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5561
5562 NewCall = Builder.CreateCall(NewFn, Args);
5563 NewCall->takeName(CI);
5564 CI->replaceAllUsesWith(NewCall);
5565 CI->eraseFromParent();
5566 return;
5567 }
5568 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5569 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5570 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5571 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5572 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5573 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5574 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5575 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5576 SmallVector<Value *, 16> Args(CI->args());
5577
5578 // Create AddrSpaceCast to shared_cluster if needed.
5579 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5580 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5582 Args[0] = Builder.CreateAddrSpaceCast(
5583 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5584
5585 // Attach the flag argument for cta_group, with a
5586 // default value of 0. This handles case (2) in
5587 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5588 size_t NumArgs = CI->arg_size();
5589 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5590 if (!FlagArg->getType()->isIntegerTy(1))
5591 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5592
5593 NewCall = Builder.CreateCall(NewFn, Args);
5594 NewCall->takeName(CI);
5595 CI->replaceAllUsesWith(NewCall);
5596 CI->eraseFromParent();
5597 return;
5598 }
5599 case Intrinsic::riscv_sha256sig0:
5600 case Intrinsic::riscv_sha256sig1:
5601 case Intrinsic::riscv_sha256sum0:
5602 case Intrinsic::riscv_sha256sum1:
5603 case Intrinsic::riscv_sm3p0:
5604 case Intrinsic::riscv_sm3p1: {
5605 // The last argument to these intrinsics used to be i8 and changed to i32.
5606 // The type overload for sm4ks and sm4ed was removed.
5607 if (!CI->getType()->isIntegerTy(64))
5608 return;
5609
5610 Value *Arg =
5611 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5612
5613 NewCall = Builder.CreateCall(NewFn, Arg);
5614 Value *Res =
5615 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5616 NewCall->takeName(CI);
5617 CI->replaceAllUsesWith(Res);
5618 CI->eraseFromParent();
5619 return;
5620 }
5621
5622 case Intrinsic::x86_xop_vfrcz_ss:
5623 case Intrinsic::x86_xop_vfrcz_sd:
5624 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5625 break;
5626
5627 case Intrinsic::x86_xop_vpermil2pd:
5628 case Intrinsic::x86_xop_vpermil2ps:
5629 case Intrinsic::x86_xop_vpermil2pd_256:
5630 case Intrinsic::x86_xop_vpermil2ps_256: {
5631 SmallVector<Value *, 4> Args(CI->args());
5632 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5633 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5634 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5635 NewCall = Builder.CreateCall(NewFn, Args);
5636 break;
5637 }
5638
5639 case Intrinsic::x86_sse41_ptestc:
5640 case Intrinsic::x86_sse41_ptestz:
5641 case Intrinsic::x86_sse41_ptestnzc: {
5642 // The arguments for these intrinsics used to be v4f32, and changed
5643 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5644 // So, the only thing required is a bitcast for both arguments.
5645 // First, check the arguments have the old type.
5646 Value *Arg0 = CI->getArgOperand(0);
5647 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5648 return;
5649
5650 // Old intrinsic, add bitcasts
5651 Value *Arg1 = CI->getArgOperand(1);
5652
5653 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5654
5655 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5656 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5657
5658 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5659 break;
5660 }
5661
5662 case Intrinsic::x86_rdtscp: {
5663 // This used to take 1 arguments. If we have no arguments, it is already
5664 // upgraded.
5665 if (CI->getNumOperands() == 0)
5666 return;
5667
5668 NewCall = Builder.CreateCall(NewFn);
5669 // Extract the second result and store it.
5670 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5671 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5672 // Replace the original call result with the first result of the new call.
5673 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5674
5675 NewCall->takeName(CI);
5676 CI->replaceAllUsesWith(TSC);
5677 CI->eraseFromParent();
5678 return;
5679 }
5680
5681 case Intrinsic::x86_sse41_insertps:
5682 case Intrinsic::x86_sse41_dppd:
5683 case Intrinsic::x86_sse41_dpps:
5684 case Intrinsic::x86_sse41_mpsadbw:
5685 case Intrinsic::x86_avx_dp_ps_256:
5686 case Intrinsic::x86_avx2_mpsadbw: {
5687 // Need to truncate the last argument from i32 to i8 -- this argument models
5688 // an inherently 8-bit immediate operand to these x86 instructions.
5689 SmallVector<Value *, 4> Args(CI->args());
5690
5691 // Replace the last argument with a trunc.
5692 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5693 NewCall = Builder.CreateCall(NewFn, Args);
5694 break;
5695 }
5696
5697 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5698 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5699 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5700 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5701 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5702 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5703 SmallVector<Value *, 4> Args(CI->args());
5704 unsigned NumElts =
5705 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5706 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5707
5708 NewCall = Builder.CreateCall(NewFn, Args);
5709 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5710
5711 NewCall->takeName(CI);
5712 CI->replaceAllUsesWith(Res);
5713 CI->eraseFromParent();
5714 return;
5715 }
5716
5717 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5718 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5719 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5720 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5721 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5722 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5723 SmallVector<Value *, 4> Args(CI->args());
5724 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5725 if (NewFn->getIntrinsicID() ==
5726 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5727 Args[1] = Builder.CreateBitCast(
5728 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5729
5730 NewCall = Builder.CreateCall(NewFn, Args);
5731 Value *Res = Builder.CreateBitCast(
5732 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5733
5734 NewCall->takeName(CI);
5735 CI->replaceAllUsesWith(Res);
5736 CI->eraseFromParent();
5737 return;
5738 }
5739 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5740 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5741 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5742 SmallVector<Value *, 4> Args(CI->args());
5743 unsigned NumElts =
5744 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5745 Args[1] = Builder.CreateBitCast(
5746 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5747 Args[2] = Builder.CreateBitCast(
5748 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5749
5750 NewCall = Builder.CreateCall(NewFn, Args);
5751 break;
5752 }
5753
5754 case Intrinsic::thread_pointer: {
5755 NewCall = Builder.CreateCall(NewFn, {});
5756 break;
5757 }
5758
5759 case Intrinsic::memcpy:
5760 case Intrinsic::memmove:
5761 case Intrinsic::memset: {
5762 // We have to make sure that the call signature is what we're expecting.
5763 // We only want to change the old signatures by removing the alignment arg:
5764 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5765 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5766 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5767 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5768 // Note: i8*'s in the above can be any pointer type
5769 if (CI->arg_size() != 5) {
5770 DefaultCase();
5771 return;
5772 }
5773 // Remove alignment argument (3), and add alignment attributes to the
5774 // dest/src pointers.
5775 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5776 CI->getArgOperand(2), CI->getArgOperand(4)};
5777 NewCall = Builder.CreateCall(NewFn, Args);
5778 AttributeList OldAttrs = CI->getAttributes();
5779 AttributeList NewAttrs = AttributeList::get(
5780 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5781 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5782 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5783 NewCall->setAttributes(NewAttrs);
5784 auto *MemCI = cast<MemIntrinsic>(NewCall);
5785 // All mem intrinsics support dest alignment.
5787 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5788 // Memcpy/Memmove also support source alignment.
5789 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5790 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5791 break;
5792 }
5793
5794 case Intrinsic::masked_load:
5795 case Intrinsic::masked_gather:
5796 case Intrinsic::masked_store:
5797 case Intrinsic::masked_scatter: {
5798 if (CI->arg_size() != 4) {
5799 DefaultCase();
5800 return;
5801 }
5802
5803 auto GetMaybeAlign = [](Value *Op) {
5804 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5805 uint64_t Val = CI->getZExtValue();
5806 if (Val == 0)
5807 return MaybeAlign();
5808 if (isPowerOf2_64(Val))
5809 return MaybeAlign(Val);
5810 }
5811 reportFatalUsageError("Invalid alignment argument");
5812 };
5813 auto GetAlign = [&](Value *Op) {
5814 MaybeAlign Align = GetMaybeAlign(Op);
5815 if (Align)
5816 return *Align;
5817 reportFatalUsageError("Invalid zero alignment argument");
5818 };
5819
5820 const DataLayout &DL = CI->getDataLayout();
5821 switch (NewFn->getIntrinsicID()) {
5822 case Intrinsic::masked_load:
5823 NewCall = Builder.CreateMaskedLoad(
5824 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5825 CI->getArgOperand(2), CI->getArgOperand(3));
5826 break;
5827 case Intrinsic::masked_gather:
5828 NewCall = Builder.CreateMaskedGather(
5829 CI->getType(), CI->getArgOperand(0),
5830 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5831 CI->getType()->getScalarType()),
5832 CI->getArgOperand(2), CI->getArgOperand(3));
5833 break;
5834 case Intrinsic::masked_store:
5835 NewCall = Builder.CreateMaskedStore(
5836 CI->getArgOperand(0), CI->getArgOperand(1),
5837 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5838 break;
5839 case Intrinsic::masked_scatter:
5840 NewCall = Builder.CreateMaskedScatter(
5841 CI->getArgOperand(0), CI->getArgOperand(1),
5842 DL.getValueOrABITypeAlignment(
5843 GetMaybeAlign(CI->getArgOperand(2)),
5844 CI->getArgOperand(0)->getType()->getScalarType()),
5845 CI->getArgOperand(3));
5846 break;
5847 default:
5848 llvm_unreachable("Unexpected intrinsic ID");
5849 }
5850 // Previous metadata is still valid.
5851 NewCall->copyMetadata(*CI);
5852 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5853 break;
5854 }
5855
5856 case Intrinsic::lifetime_start:
5857 case Intrinsic::lifetime_end: {
5858 if (CI->arg_size() != 2) {
5859 DefaultCase();
5860 return;
5861 }
5862
5863 Value *Ptr = CI->getArgOperand(1);
5864 // Try to strip pointer casts, such that the lifetime works on an alloca.
5865 Ptr = Ptr->stripPointerCasts();
5866 if (isa<AllocaInst>(Ptr)) {
5867 // Don't use NewFn, as we might have looked through an addrspacecast.
5868 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5869 NewCall = Builder.CreateLifetimeStart(Ptr);
5870 else
5871 NewCall = Builder.CreateLifetimeEnd(Ptr);
5872 break;
5873 }
5874
5875 // Otherwise remove the lifetime marker.
5876 CI->eraseFromParent();
5877 return;
5878 }
5879
5880 case Intrinsic::x86_avx512_vpdpbusd_128:
5881 case Intrinsic::x86_avx512_vpdpbusd_256:
5882 case Intrinsic::x86_avx512_vpdpbusd_512:
5883 case Intrinsic::x86_avx512_vpdpbusds_128:
5884 case Intrinsic::x86_avx512_vpdpbusds_256:
5885 case Intrinsic::x86_avx512_vpdpbusds_512:
5886 case Intrinsic::x86_avx2_vpdpbssd_128:
5887 case Intrinsic::x86_avx2_vpdpbssd_256:
5888 case Intrinsic::x86_avx10_vpdpbssd_512:
5889 case Intrinsic::x86_avx2_vpdpbssds_128:
5890 case Intrinsic::x86_avx2_vpdpbssds_256:
5891 case Intrinsic::x86_avx10_vpdpbssds_512:
5892 case Intrinsic::x86_avx2_vpdpbsud_128:
5893 case Intrinsic::x86_avx2_vpdpbsud_256:
5894 case Intrinsic::x86_avx10_vpdpbsud_512:
5895 case Intrinsic::x86_avx2_vpdpbsuds_128:
5896 case Intrinsic::x86_avx2_vpdpbsuds_256:
5897 case Intrinsic::x86_avx10_vpdpbsuds_512:
5898 case Intrinsic::x86_avx2_vpdpbuud_128:
5899 case Intrinsic::x86_avx2_vpdpbuud_256:
5900 case Intrinsic::x86_avx10_vpdpbuud_512:
5901 case Intrinsic::x86_avx2_vpdpbuuds_128:
5902 case Intrinsic::x86_avx2_vpdpbuuds_256:
5903 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5904 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5905 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5906 CI->getArgOperand(2)};
5907 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5908 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5909 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5910
5911 NewCall = Builder.CreateCall(NewFn, Args);
5912 break;
5913 }
5914 case Intrinsic::x86_avx512_vpdpwssd_128:
5915 case Intrinsic::x86_avx512_vpdpwssd_256:
5916 case Intrinsic::x86_avx512_vpdpwssd_512:
5917 case Intrinsic::x86_avx512_vpdpwssds_128:
5918 case Intrinsic::x86_avx512_vpdpwssds_256:
5919 case Intrinsic::x86_avx512_vpdpwssds_512:
5920 case Intrinsic::x86_avx2_vpdpwsud_128:
5921 case Intrinsic::x86_avx2_vpdpwsud_256:
5922 case Intrinsic::x86_avx10_vpdpwsud_512:
5923 case Intrinsic::x86_avx2_vpdpwsuds_128:
5924 case Intrinsic::x86_avx2_vpdpwsuds_256:
5925 case Intrinsic::x86_avx10_vpdpwsuds_512:
5926 case Intrinsic::x86_avx2_vpdpwusd_128:
5927 case Intrinsic::x86_avx2_vpdpwusd_256:
5928 case Intrinsic::x86_avx10_vpdpwusd_512:
5929 case Intrinsic::x86_avx2_vpdpwusds_128:
5930 case Intrinsic::x86_avx2_vpdpwusds_256:
5931 case Intrinsic::x86_avx10_vpdpwusds_512:
5932 case Intrinsic::x86_avx2_vpdpwuud_128:
5933 case Intrinsic::x86_avx2_vpdpwuud_256:
5934 case Intrinsic::x86_avx10_vpdpwuud_512:
5935 case Intrinsic::x86_avx2_vpdpwuuds_128:
5936 case Intrinsic::x86_avx2_vpdpwuuds_256:
5937 case Intrinsic::x86_avx10_vpdpwuuds_512:
5938 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5939 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5940 CI->getArgOperand(2)};
5941 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5942 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5943 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5944
5945 NewCall = Builder.CreateCall(NewFn, Args);
5946 break;
5947 }
5948 assert(NewCall && "Should have either set this variable or returned through "
5949 "the default case");
5950 NewCall->takeName(CI);
5951 CI->replaceAllUsesWith(NewCall);
5952 CI->eraseFromParent();
5953}
5954
5956 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5957
5958 // Check if this function should be upgraded and get the replacement function
5959 // if there is one.
5960 Function *NewFn;
5961 if (UpgradeIntrinsicFunction(F, NewFn)) {
5962 // Replace all users of the old function with the new function or new
5963 // instructions. This is not a range loop because the call is deleted.
5964 for (User *U : make_early_inc_range(F->users()))
5965 if (CallBase *CB = dyn_cast<CallBase>(U))
5966 UpgradeIntrinsicCall(CB, NewFn);
5967
5968 // Remove old function, no longer used, from the module.
5969 if (F != NewFn)
5970 F->eraseFromParent();
5971 }
5972}
5973
5975 const unsigned NumOperands = MD.getNumOperands();
5976 if (NumOperands == 0)
5977 return &MD; // Invalid, punt to a verifier error.
5978
5979 // Check if the tag uses struct-path aware TBAA format.
5980 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5981 return &MD;
5982
5983 auto &Context = MD.getContext();
5984 if (NumOperands == 3) {
5985 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5986 MDNode *ScalarType = MDNode::get(Context, Elts);
5987 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5988 Metadata *Elts2[] = {ScalarType, ScalarType,
5991 MD.getOperand(2)};
5992 return MDNode::get(Context, Elts2);
5993 }
5994 // Create a MDNode <MD, MD, offset 0>
5996 Type::getInt64Ty(Context)))};
5997 return MDNode::get(Context, Elts);
5998}
5999
6001 Instruction *&Temp) {
6002 if (Opc != Instruction::BitCast)
6003 return nullptr;
6004
6005 Temp = nullptr;
6006 Type *SrcTy = V->getType();
6007 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
6008 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
6009 LLVMContext &Context = V->getContext();
6010
6011 // We have no information about target data layout, so we assume that
6012 // the maximum pointer size is 64bit.
6013 Type *MidTy = Type::getInt64Ty(Context);
6014 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
6015
6016 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
6017 }
6018
6019 return nullptr;
6020}
6021
6023 if (Opc != Instruction::BitCast)
6024 return nullptr;
6025
6026 Type *SrcTy = C->getType();
6027 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
6028 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
6029 LLVMContext &Context = C->getContext();
6030
6031 // We have no information about target data layout, so we assume that
6032 // the maximum pointer size is 64bit.
6033 Type *MidTy = Type::getInt64Ty(Context);
6034
6036 DestTy);
6037 }
6038
6039 return nullptr;
6040}
6041
6042/// Check the debug info version number, if it is out-dated, drop the debug
6043/// info. Return true if module is modified.
6046 return false;
6047
6048 llvm::TimeTraceScope timeScope("Upgrade debug info");
6049 // We need to get metadata before the module is verified (i.e., getModuleFlag
6050 // makes assumptions that we haven't verified yet). Carefully extract the flag
6051 // from the metadata.
6052 unsigned Version = 0;
6053 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
6054 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
6055 if (Flag->getNumOperands() < 3)
6056 return false;
6057 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
6058 return K->getString() == "Debug Info Version";
6059 return false;
6060 });
6061 if (OpIt != ModFlags->op_end()) {
6062 const MDOperand &ValOp = (*OpIt)->getOperand(2);
6063 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
6064 Version = CI->getZExtValue();
6065 }
6066 }
6067
6069 bool BrokenDebugInfo = false;
6070 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
6071 report_fatal_error("Broken module found, compilation aborted!");
6072 if (!BrokenDebugInfo)
6073 // Everything is ok.
6074 return false;
6075 else {
6076 // Diagnose malformed debug info.
6078 M.getContext().diagnose(Diag);
6079 }
6080 }
6081 bool Modified = StripDebugInfo(M);
6083 // Diagnose a version mismatch.
6085 M.getContext().diagnose(DiagVersion);
6086 }
6087 return Modified;
6088}
6089
6090static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
6091 GlobalValue *GV, const Metadata *V) {
6092 Function *F = cast<Function>(GV);
6093
6094 constexpr StringLiteral DefaultValue = "1";
6095 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
6096 unsigned Length = 0;
6097
6098 if (F->hasFnAttribute(Attr)) {
6099 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
6100 // parse these elements placing them into Vect3
6101 StringRef S = F->getFnAttribute(Attr).getValueAsString();
6102 for (; Length < 3 && !S.empty(); Length++) {
6103 auto [Part, Rest] = S.split(',');
6104 Vect3[Length] = Part.trim();
6105 S = Rest;
6106 }
6107 }
6108
6109 const unsigned Dim = DimC - 'x';
6110 assert(Dim < 3 && "Unexpected dim char");
6111
6112 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
6113
6114 // local variable required for StringRef in Vect3 to point to.
6115 const std::string VStr = llvm::utostr(VInt);
6116 Vect3[Dim] = VStr;
6117 Length = std::max(Length, Dim + 1);
6118
6119 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
6120 F->addFnAttr(Attr, NewAttr);
6121}
6122
6123static inline bool isXYZ(StringRef S) {
6124 return S == "x" || S == "y" || S == "z";
6125}
6126
6128 const Metadata *V) {
6129 if (K == "kernel") {
6131 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
6132 return true;
6133 }
6134 if (K == "align") {
6135 // V is a bitfeild specifying two 16-bit values. The alignment value is
6136 // specfied in low 16-bits, The index is specified in the high bits. For the
6137 // index, 0 indicates the return value while higher values correspond to
6138 // each parameter (idx = param + 1).
6139 const uint64_t AlignIdxValuePair =
6140 mdconst::extract<ConstantInt>(V)->getZExtValue();
6141 const unsigned Idx = (AlignIdxValuePair >> 16);
6142 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
6143 cast<Function>(GV)->addAttributeAtIndex(
6144 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
6145 return true;
6146 }
6147 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
6148 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6150 return true;
6151 }
6152 if (K == "minctasm") {
6153 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6154 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
6155 return true;
6156 }
6157 if (K == "maxnreg") {
6158 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6159 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
6160 return true;
6161 }
6162 if (K.consume_front("maxntid") && isXYZ(K)) {
6164 return true;
6165 }
6166 if (K.consume_front("reqntid") && isXYZ(K)) {
6168 return true;
6169 }
6170 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
6172 return true;
6173 }
6174 if (K == "grid_constant") {
6175 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
6176 for (const auto &Op : cast<MDNode>(V)->operands()) {
6177 // For some reason, the index is 1-based in the metadata. Good thing we're
6178 // able to auto-upgrade it!
6179 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
6180 cast<Function>(GV)->addParamAttr(Index, Attr);
6181 }
6182 return true;
6183 }
6184
6185 return false;
6186}
6187
6189 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
6190 if (!NamedMD)
6191 return;
6192
6193 SmallVector<MDNode *, 8> NewNodes;
6195 for (MDNode *MD : NamedMD->operands()) {
6196 if (!SeenNodes.insert(MD).second)
6197 continue;
6198
6199 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
6200 if (!GV)
6201 continue;
6202
6203 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
6204
6205 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
6206 // Each nvvm.annotations metadata entry will be of the following form:
6207 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6208 // start index = 1, to skip the global variable key
6209 // increment = 2, to skip the value for each property-value pairs
6210 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6211 MDString *K = cast<MDString>(MD->getOperand(j));
6212 const MDOperand &V = MD->getOperand(j + 1);
6213 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
6214 if (!Upgraded)
6215 NewOperands.append({K, V});
6216 }
6217
6218 if (NewOperands.size() > 1)
6219 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6220 }
6221
6222 NamedMD->clearOperands();
6223 for (MDNode *N : NewNodes)
6224 NamedMD->addOperand(N);
6225}
6226
6227/// This checks for objc retain release marker which should be upgraded. It
6228/// returns true if module is modified.
6230 bool Changed = false;
6231 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6232 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6233 if (ModRetainReleaseMarker) {
6234 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6235 if (Op) {
6236 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6237 if (ID) {
6238 SmallVector<StringRef, 4> ValueComp;
6239 ID->getString().split(ValueComp, "#");
6240 if (ValueComp.size() == 2) {
6241 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6242 ID = MDString::get(M.getContext(), NewValue);
6243 }
6244 M.addModuleFlag(Module::Error, MarkerKey, ID);
6245 M.eraseNamedMetadata(ModRetainReleaseMarker);
6246 Changed = true;
6247 }
6248 }
6249 }
6250 return Changed;
6251}
6252
6254 // This lambda converts normal function calls to ARC runtime functions to
6255 // intrinsic calls.
6256 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6257 llvm::Intrinsic::ID IntrinsicFunc) {
6258 Function *Fn = M.getFunction(OldFunc);
6259
6260 if (!Fn)
6261 return;
6262
6263 Function *NewFn =
6264 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6265
6266 for (User *U : make_early_inc_range(Fn->users())) {
6268 if (!CI || CI->getCalledFunction() != Fn)
6269 continue;
6270
6271 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6272 FunctionType *NewFuncTy = NewFn->getFunctionType();
6274
6275 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6276 // value to the return type of the old function.
6277 if (NewFuncTy->getReturnType() != CI->getType() &&
6278 !CastInst::castIsValid(Instruction::BitCast, CI,
6279 NewFuncTy->getReturnType()))
6280 continue;
6281
6282 bool InvalidCast = false;
6283
6284 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6285 Value *Arg = CI->getArgOperand(I);
6286
6287 // Bitcast argument to the parameter type of the new function if it's
6288 // not a variadic argument.
6289 if (I < NewFuncTy->getNumParams()) {
6290 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6291 // to the parameter type of the new function.
6292 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6293 NewFuncTy->getParamType(I))) {
6294 InvalidCast = true;
6295 break;
6296 }
6297 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6298 }
6299 Args.push_back(Arg);
6300 }
6301
6302 if (InvalidCast)
6303 continue;
6304
6305 // Create a call instruction that calls the new function.
6306 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6307 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6308 NewCall->takeName(CI);
6309
6310 // Bitcast the return value back to the type of the old call.
6311 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6312
6313 if (!CI->use_empty())
6314 CI->replaceAllUsesWith(NewRetVal);
6315 CI->eraseFromParent();
6316 }
6317
6318 if (Fn->use_empty())
6319 Fn->eraseFromParent();
6320 };
6321
6322 // Unconditionally convert a call to "clang.arc.use" to a call to
6323 // "llvm.objc.clang.arc.use".
6324 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6325
6326 // Upgrade the retain release marker. If there is no need to upgrade
6327 // the marker, that means either the module is already new enough to contain
6328 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6330 return;
6331
6332 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6333 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6334 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6335 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6336 {"objc_autoreleaseReturnValue",
6337 llvm::Intrinsic::objc_autoreleaseReturnValue},
6338 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6339 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6340 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6341 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6342 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6343 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6344 {"objc_release", llvm::Intrinsic::objc_release},
6345 {"objc_retain", llvm::Intrinsic::objc_retain},
6346 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6347 {"objc_retainAutoreleaseReturnValue",
6348 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6349 {"objc_retainAutoreleasedReturnValue",
6350 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6351 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6352 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6353 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6354 {"objc_unsafeClaimAutoreleasedReturnValue",
6355 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6356 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6357 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6358 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6359 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6360 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6361 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6362 {"objc_arc_annotation_topdown_bbstart",
6363 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6364 {"objc_arc_annotation_topdown_bbend",
6365 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6366 {"objc_arc_annotation_bottomup_bbstart",
6367 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6368 {"objc_arc_annotation_bottomup_bbend",
6369 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6370
6371 for (auto &I : RuntimeFuncs)
6372 UpgradeToIntrinsic(I.first, I.second);
6373}
6374
6376 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6377 if (!ModFlags)
6378 return false;
6379
6380 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6381 bool HasSwiftVersionFlag = false;
6382 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6383 uint32_t SwiftABIVersion;
6384 auto Int8Ty = Type::getInt8Ty(M.getContext());
6385 auto Int32Ty = Type::getInt32Ty(M.getContext());
6386
6387 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6388 MDNode *Op = ModFlags->getOperand(I);
6389 if (Op->getNumOperands() != 3)
6390 continue;
6391 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6392 if (!ID)
6393 continue;
6394 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6395 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6396 Type::getInt32Ty(M.getContext()), B)),
6397 MDString::get(M.getContext(), ID->getString()),
6398 Op->getOperand(2)};
6399 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6400 Changed = true;
6401 };
6402
6403 if (ID->getString() == "Objective-C Image Info Version")
6404 HasObjCFlag = true;
6405 if (ID->getString() == "Objective-C Class Properties")
6406 HasClassProperties = true;
6407 // Upgrade PIC from Error/Max to Min.
6408 if (ID->getString() == "PIC Level") {
6409 if (auto *Behavior =
6411 uint64_t V = Behavior->getLimitedValue();
6412 if (V == Module::Error || V == Module::Max)
6413 SetBehavior(Module::Min);
6414 }
6415 }
6416 // Upgrade "PIE Level" from Error to Max.
6417 if (ID->getString() == "PIE Level")
6418 if (auto *Behavior =
6420 if (Behavior->getLimitedValue() == Module::Error)
6421 SetBehavior(Module::Max);
6422
6423 // Upgrade branch protection and return address signing module flags. The
6424 // module flag behavior for these fields were Error and now they are Min.
6425 if (ID->getString() == "branch-target-enforcement" ||
6426 ID->getString().starts_with("sign-return-address")) {
6427 if (auto *Behavior =
6429 if (Behavior->getLimitedValue() == Module::Error) {
6430 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6431 Metadata *Ops[3] = {
6432 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6433 Op->getOperand(1), Op->getOperand(2)};
6434 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6435 Changed = true;
6436 }
6437 }
6438 }
6439
6440 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6441 // section name so that llvm-lto will not complain about mismatching
6442 // module flags that is functionally the same.
6443 if (ID->getString() == "Objective-C Image Info Section") {
6444 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6445 SmallVector<StringRef, 4> ValueComp;
6446 Value->getString().split(ValueComp, " ");
6447 if (ValueComp.size() != 1) {
6448 std::string NewValue;
6449 for (auto &S : ValueComp)
6450 NewValue += S.str();
6451 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6452 MDString::get(M.getContext(), NewValue)};
6453 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6454 Changed = true;
6455 }
6456 }
6457 }
6458
6459 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6460 // If the higher bits are set, it adds new module flag for swift info.
6461 if (ID->getString() == "Objective-C Garbage Collection") {
6462 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6463 if (Md) {
6464 assert(Md->getValue() && "Expected non-empty metadata");
6465 auto Type = Md->getValue()->getType();
6466 if (Type == Int8Ty)
6467 continue;
6468 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6469 if ((Val & 0xff) != Val) {
6470 HasSwiftVersionFlag = true;
6471 SwiftABIVersion = (Val & 0xff00) >> 8;
6472 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6473 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6474 }
6475 Metadata *Ops[3] = {
6476 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
6477 Op->getOperand(1),
6478 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6479 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6480 Changed = true;
6481 }
6482 }
6483
6484 if (ID->getString() == "amdgpu_code_object_version") {
6485 Metadata *Ops[3] = {
6486 Op->getOperand(0),
6487 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6488 Op->getOperand(2)};
6489 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6490 Changed = true;
6491 }
6492 }
6493
6494 // "Objective-C Class Properties" is recently added for Objective-C. We
6495 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6496 // flag of value 0, so we can correclty downgrade this flag when trying to
6497 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6498 // this module flag.
6499 if (HasObjCFlag && !HasClassProperties) {
6500 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6501 (uint32_t)0);
6502 Changed = true;
6503 }
6504
6505 if (HasSwiftVersionFlag) {
6506 M.addModuleFlag(Module::Error, "Swift ABI Version",
6507 SwiftABIVersion);
6508 M.addModuleFlag(Module::Error, "Swift Major Version",
6509 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6510 M.addModuleFlag(Module::Error, "Swift Minor Version",
6511 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6512 Changed = true;
6513 }
6514
6515 return Changed;
6516}
6517
6519 NamedMDNode *CFIConsts = M.getNamedMetadata("cfi.functions");
6520 // If this metadata has operands, we expect all of them to be either from
6521 // before or from after the format change handled here, so we can bail out
6522 // fast if the first (if any) operands is of the new format.
6523 auto MatchesVersion = [](const MDNode *Op) {
6524 return Op->getNumOperands() >= 3 &&
6525 isa<ConstantAsMetadata>(Op->getOperand(2)) &&
6526 cast<ConstantAsMetadata>(Op->getOperand(2))
6527 ->getType()
6528 ->isIntegerTy(64);
6529 };
6530
6531 if (!CFIConsts || !CFIConsts->getNumOperands() ||
6532 MatchesVersion(CFIConsts->getOperand(0)))
6533 return false;
6534
6535 bool Changed = false;
6536 for (unsigned I = 0, E = CFIConsts->getNumOperands(); I != E; ++I) {
6537 MDNode *Op = CFIConsts->getOperand(I);
6538 assert(!MatchesVersion(Op) && "Unexpected mix of CFIConstant formats");
6539 assert(Op->getNumOperands() >= 2 &&
6540 "Expected at least 2 operands - name and linkage type");
6541 MDString *NameMD = dyn_cast<MDString>(Op->getOperand(0));
6542 StringRef Name = NameMD->getString();
6545
6547 Elts.push_back(Op->getOperand(0));
6548 Elts.push_back(Op->getOperand(1));
6550 ConstantInt::get(Type::getInt64Ty(M.getContext()), GUID)));
6551
6552 for (unsigned J = 2, EJ = Op->getNumOperands(); J != EJ; ++J)
6553 Elts.push_back(Op->getOperand(J));
6554
6555 CFIConsts->setOperand(I, MDNode::get(M.getContext(), Elts));
6556 Changed = true;
6557 }
6558
6559 return Changed;
6560}
6561
6563 auto TrimSpaces = [](StringRef Section) -> std::string {
6564 SmallVector<StringRef, 5> Components;
6565 Section.split(Components, ',');
6566
6567 SmallString<32> Buffer;
6568 raw_svector_ostream OS(Buffer);
6569
6570 for (auto Component : Components)
6571 OS << ',' << Component.trim();
6572
6573 return std::string(OS.str().substr(1));
6574 };
6575
6576 for (auto &GV : M.globals()) {
6577 if (!GV.hasSection())
6578 continue;
6579
6580 StringRef Section = GV.getSection();
6581
6582 if (!Section.starts_with("__DATA, __objc_catlist"))
6583 continue;
6584
6585 // __DATA, __objc_catlist, regular, no_dead_strip
6586 // __DATA,__objc_catlist,regular,no_dead_strip
6587 GV.setSection(TrimSpaces(Section));
6588 }
6589}
6590
6591namespace {
6592// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6593// callsites within a function that did not also have the strictfp attribute.
6594// Since 10.0, if strict FP semantics are needed within a function, the
6595// function must have the strictfp attribute and all calls within the function
6596// must also have the strictfp attribute. This latter restriction is
6597// necessary to prevent unwanted libcall simplification when a function is
6598// being cloned (such as for inlining).
6599//
6600// The "dangling" strictfp attribute usage was only used to prevent constant
6601// folding and other libcall simplification. The nobuiltin attribute on the
6602// callsite has the same effect.
6603struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6604 StrictFPUpgradeVisitor() = default;
6605
6606 void visitCallBase(CallBase &Call) {
6607 if (!Call.isStrictFP())
6608 return;
6610 return;
6611 // If we get here, the caller doesn't have the strictfp attribute
6612 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6613 Call.removeFnAttr(Attribute::StrictFP);
6614 Call.addFnAttr(Attribute::NoBuiltin);
6615 }
6616};
6617
6618/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6619struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6620 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6621 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6622
6623 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6624 if (!RMW.isFloatingPointOperation())
6625 return;
6626
6627 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6628 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6629 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6630 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6631 }
6632};
6633} // namespace
6634
6636 // If a function definition doesn't have the strictfp attribute,
6637 // convert any callsite strictfp attributes to nobuiltin.
6638 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6639 StrictFPUpgradeVisitor SFPV;
6640 SFPV.visit(F);
6641 }
6642
6643 // Remove all incompatibile attributes from function.
6644 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6645 F.getReturnType(), F.getAttributes().getRetAttrs()));
6646 for (auto &Arg : F.args())
6647 Arg.removeAttrs(
6648 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6649
6650 bool AddingAttrs = false, RemovingAttrs = false;
6651 AttrBuilder AttrsToAdd(F.getContext());
6652 AttributeMask AttrsToRemove;
6653
6654 // Older versions of LLVM treated an "implicit-section-name" attribute
6655 // similarly to directly setting the section on a Function.
6656 if (Attribute A = F.getFnAttribute("implicit-section-name");
6657 A.isValid() && A.isStringAttribute()) {
6658 F.setSection(A.getValueAsString());
6659 AttrsToRemove.addAttribute("implicit-section-name");
6660 RemovingAttrs = true;
6661 }
6662
6663 if (Attribute A = F.getFnAttribute("nooutline");
6664 A.isValid() && A.isStringAttribute()) {
6665 AttrsToRemove.addAttribute("nooutline");
6666 AttrsToAdd.addAttribute(Attribute::NoOutline);
6667 AddingAttrs = RemovingAttrs = true;
6668 }
6669
6670 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6671 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6672 AttrsToRemove.addAttribute("uniform-work-group-size");
6673 RemovingAttrs = true;
6674 if (A.getValueAsString() == "true") {
6675 AttrsToAdd.addAttribute("uniform-work-group-size");
6676 AddingAttrs = true;
6677 }
6678 }
6679
6680 if (!F.empty()) {
6681 // For some reason this is called twice, and the first time is before any
6682 // instructions are loaded into the body.
6683
6684 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6685 A.isValid()) {
6686
6687 if (A.getValueAsBool()) {
6688 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6689 Visitor.visit(F);
6690 }
6691
6692 // We will leave behind dead attribute uses on external declarations, but
6693 // clang never added these to declarations anyway.
6694 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6695 RemovingAttrs = true;
6696 }
6697 }
6698
6699 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6700 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6701
6702 bool HandleDenormalMode = false;
6703
6704 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6705 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6706 if (ParsedMode.isValid()) {
6707 DenormalFPMath = ParsedMode;
6708 AttrsToRemove.addAttribute("denormal-fp-math");
6709 AddingAttrs = RemovingAttrs = true;
6710 HandleDenormalMode = true;
6711 }
6712 }
6713
6714 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6715 Attr.isValid()) {
6716 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6717 if (ParsedMode.isValid()) {
6718 DenormalFPMathF32 = ParsedMode;
6719 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6720 AddingAttrs = RemovingAttrs = true;
6721 HandleDenormalMode = true;
6722 }
6723 }
6724
6725 if (HandleDenormalMode)
6726 AttrsToAdd.addDenormalFPEnvAttr(
6727 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6728
6729 if (RemovingAttrs)
6730 F.removeFnAttrs(AttrsToRemove);
6731
6732 if (AddingAttrs)
6733 F.addFnAttrs(AttrsToAdd);
6734}
6735
6736// Check if the function attribute is not present and set it.
6738 StringRef Value) {
6739 if (!F.hasFnAttribute(FnAttrName))
6740 F.addFnAttr(FnAttrName, Value);
6741}
6742
6743// Check if the function attribute is not present and set it if needed.
6744// If the attribute is "false" then removes it.
6745// If the attribute is "true" resets it to a valueless attribute.
6746static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6747 if (!F.hasFnAttribute(FnAttrName)) {
6748 if (Set)
6749 F.addFnAttr(FnAttrName);
6750 } else {
6751 auto A = F.getFnAttribute(FnAttrName);
6752 if ("false" == A.getValueAsString())
6753 F.removeFnAttr(FnAttrName);
6754 else if ("true" == A.getValueAsString()) {
6755 F.removeFnAttr(FnAttrName);
6756 F.addFnAttr(FnAttrName);
6757 }
6758 }
6759}
6760
6762 Triple T(M.getTargetTriple());
6763 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6764 return;
6765
6766 uint64_t BTEValue = 0;
6767 uint64_t BPPLRValue = 0;
6768 uint64_t GCSValue = 0;
6769 uint64_t SRAValue = 0;
6770 uint64_t SRAALLValue = 0;
6771 uint64_t SRABKeyValue = 0;
6772
6773 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6774 if (ModFlags) {
6775 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6776 MDNode *Op = ModFlags->getOperand(I);
6777 if (Op->getNumOperands() != 3)
6778 continue;
6779
6780 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6781 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6782 if (!ID || !CI)
6783 continue;
6784
6785 StringRef IDStr = ID->getString();
6786 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6787 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6788 : IDStr == "guarded-control-stack" ? &GCSValue
6789 : IDStr == "sign-return-address" ? &SRAValue
6790 : IDStr == "sign-return-address-all" ? &SRAALLValue
6791 : IDStr == "sign-return-address-with-bkey"
6792 ? &SRABKeyValue
6793 : nullptr;
6794 if (!ValPtr)
6795 continue;
6796
6797 *ValPtr = CI->getZExtValue();
6798 if (*ValPtr == 2)
6799 return;
6800 }
6801 }
6802
6803 bool BTE = BTEValue == 1;
6804 bool BPPLR = BPPLRValue == 1;
6805 bool GCS = GCSValue == 1;
6806 bool SRA = SRAValue == 1;
6807
6808 StringRef SignTypeValue = "non-leaf";
6809 if (SRA && SRAALLValue == 1)
6810 SignTypeValue = "all";
6811
6812 StringRef SignKeyValue = "a_key";
6813 if (SRA && SRABKeyValue == 1)
6814 SignKeyValue = "b_key";
6815
6816 for (Function &F : M.getFunctionList()) {
6817 if (F.isDeclaration())
6818 continue;
6819
6820 if (SRA) {
6821 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6822 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6823 } else {
6824 if (auto A = F.getFnAttribute("sign-return-address");
6825 A.isValid() && "none" == A.getValueAsString()) {
6826 F.removeFnAttr("sign-return-address");
6827 F.removeFnAttr("sign-return-address-key");
6828 }
6829 }
6830 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6831 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6832 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6833 }
6834
6835 if (BTE)
6836 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6837 if (BPPLR)
6838 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6839 if (GCS)
6840 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6841 if (SRA) {
6842 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6843 if (SRAALLValue == 1)
6844 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6845 if (SRABKeyValue == 1)
6846 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6847 }
6848}
6849
6850static bool isOldLoopArgument(Metadata *MD) {
6851 auto *T = dyn_cast_or_null<MDTuple>(MD);
6852 if (!T)
6853 return false;
6854 if (T->getNumOperands() < 1)
6855 return false;
6856 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6857 if (!S)
6858 return false;
6859 return S->getString().starts_with("llvm.vectorizer.");
6860}
6861
6863 StringRef OldPrefix = "llvm.vectorizer.";
6864 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6865
6866 if (OldTag == "llvm.vectorizer.unroll")
6867 return MDString::get(C, "llvm.loop.interleave.count");
6868
6869 return MDString::get(
6870 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6871 .str());
6872}
6873
6875 auto *T = dyn_cast_or_null<MDTuple>(MD);
6876 if (!T)
6877 return MD;
6878 if (T->getNumOperands() < 1)
6879 return MD;
6880 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6881 if (!OldTag)
6882 return MD;
6883 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6884 return MD;
6885
6886 // This has an old tag. Upgrade it.
6888 Ops.reserve(T->getNumOperands());
6889 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6890 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6891 Ops.push_back(T->getOperand(I));
6892
6893 return MDTuple::get(T->getContext(), Ops);
6894}
6895
6897 auto *T = dyn_cast<MDTuple>(&N);
6898 if (!T)
6899 return &N;
6900
6901 if (none_of(T->operands(), isOldLoopArgument))
6902 return &N;
6903
6905 Ops.reserve(T->getNumOperands());
6906 for (Metadata *MD : T->operands())
6907 Ops.push_back(upgradeLoopArgument(MD));
6908
6909 return MDTuple::get(T->getContext(), Ops);
6910}
6911
6913 Triple T(TT);
6914 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6915 // the address space of globals to 1. This does not apply to SPIRV Logical.
6916 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6917 !DL.contains("-G") && !DL.starts_with("G")) {
6918 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6919 }
6920
6921 if (T.isLoongArch64() || T.isRISCV64()) {
6922 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6923 auto I = DL.find("-n64-");
6924 if (I != StringRef::npos)
6925 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6926 return DL.str();
6927 }
6928
6929 // AMDGPU data layout upgrades.
6930 std::string Res = DL.str();
6931 if (T.isAMDGPU()) {
6932 // Define address spaces for constants.
6933 if (!DL.contains("-G") && !DL.starts_with("G"))
6934 Res.append(Res.empty() ? "G1" : "-G1");
6935
6936 // AMDGCN data layout upgrades.
6937 if (T.isAMDGCN()) {
6938
6939 // Add missing non-integral declarations.
6940 // This goes before adding new address spaces to prevent incoherent string
6941 // values.
6942 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6943 Res.append("-ni:7:8:9");
6944 // Update ni:7 to ni:7:8:9.
6945 if (DL.ends_with("ni:7"))
6946 Res.append(":8:9");
6947 if (DL.ends_with("ni:7:8"))
6948 Res.append(":9");
6949
6950 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6951 // resources) An empty data layout has already been upgraded to G1 by now.
6952 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6953 Res.append("-p7:160:256:256:32");
6954 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6955 Res.append("-p8:128:128:128:48");
6956 constexpr StringRef OldP8("-p8:128:128-");
6957 if (DL.contains(OldP8))
6958 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6959 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6960 Res.append("-p9:192:256:256:32");
6961 }
6962
6963 // Upgrade the ELF mangling mode.
6964 if (!DL.contains("m:e"))
6965 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6966
6967 return Res;
6968 }
6969
6970 if (T.isSystemZ() && !DL.empty()) {
6971 // Make sure the stack alignment is present.
6972 if (!DL.contains("-S64"))
6973 return "E-S64" + DL.drop_front(1).str();
6974 return DL.str();
6975 }
6976
6977 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6978 // If the datalayout matches the expected format, add pointer size address
6979 // spaces to the datalayout.
6980 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6981 if (!DL.contains(AddrSpaces)) {
6983 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6984 if (R.match(Res, &Groups))
6985 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6986 }
6987 };
6988
6989 // AArch64 data layout upgrades.
6990 if (T.isAArch64()) {
6991 // Add "-Fn32"
6992 if (!DL.empty() && !DL.contains("-Fn32"))
6993 Res.append("-Fn32");
6994 AddPtr32Ptr64AddrSpaces();
6995 return Res;
6996 }
6997
6998 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6999 T.isWasm()) {
7000 // Mips64 with o32 ABI did not add "-i128:128".
7001 // Add "-i128:128"
7002 std::string I64 = "-i64:64";
7003 std::string I128 = "-i128:128";
7004 if (!StringRef(Res).contains(I128)) {
7005 size_t Pos = Res.find(I64);
7006 if (Pos != size_t(-1))
7007 Res.insert(Pos + I64.size(), I128);
7008 }
7009 }
7010
7011 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
7012 size_t Pos = Res.find("-S128");
7013 if (Pos == StringRef::npos)
7014 Pos = Res.size();
7015 Res.insert(Pos, "-f64:32:64");
7016 }
7017
7018 if (!T.isX86())
7019 return Res;
7020
7021 AddPtr32Ptr64AddrSpaces();
7022
7023 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
7024 // for i128 operations prior to this being reflected in the data layout, and
7025 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
7026 // boundaries, so although this is a breaking change, the upgrade is expected
7027 // to fix more IR than it breaks.
7028 // Intel MCU is an exception and uses 4-byte-alignment.
7029 if (!T.isOSIAMCU()) {
7030 std::string I128 = "-i128:128";
7031 if (StringRef Ref = Res; !Ref.contains(I128)) {
7033 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
7034 if (R.match(Res, &Groups))
7035 Res = (Groups[1] + I128 + Groups[3]).str();
7036 }
7037 }
7038
7039 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
7040 // Raising the alignment is safe because Clang did not produce f80 values in
7041 // the MSVC environment before this upgrade was added.
7042 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
7043 StringRef Ref = Res;
7044 auto I = Ref.find("-f80:32-");
7045 if (I != StringRef::npos)
7046 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
7047 }
7048
7049 return Res;
7050}
7051
7052void llvm::UpgradeAttributes(AttrBuilder &B) {
7053 StringRef FramePointer;
7054 Attribute A = B.getAttribute("no-frame-pointer-elim");
7055 if (A.isValid()) {
7056 // The value can be "true" or "false".
7057 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
7058 B.removeAttribute("no-frame-pointer-elim");
7059 }
7060 if (B.contains("no-frame-pointer-elim-non-leaf")) {
7061 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
7062 if (FramePointer != "all")
7063 FramePointer = "non-leaf";
7064 B.removeAttribute("no-frame-pointer-elim-non-leaf");
7065 }
7066 if (!FramePointer.empty())
7067 B.addAttribute("frame-pointer", FramePointer);
7068
7069 A = B.getAttribute("null-pointer-is-valid");
7070 if (A.isValid()) {
7071 // The value can be "true" or "false".
7072 bool NullPointerIsValid = A.getValueAsString() == "true";
7073 B.removeAttribute("null-pointer-is-valid");
7074 if (NullPointerIsValid)
7075 B.addAttribute(Attribute::NullPointerIsValid);
7076 }
7077
7078 A = B.getAttribute("uniform-work-group-size");
7079 if (A.isValid()) {
7080 StringRef Val = A.getValueAsString();
7081 if (!Val.empty()) {
7082 bool IsTrue = Val == "true";
7083 B.removeAttribute("uniform-work-group-size");
7084 if (IsTrue)
7085 B.addAttribute("uniform-work-group-size");
7086 }
7087 }
7088}
7089
7090void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
7091 // clang.arc.attachedcall bundles are now required to have an operand.
7092 // If they don't, it's okay to drop them entirely: when there is an operand,
7093 // the "attachedcall" is meaningful and required, but without an operand,
7094 // it's just a marker NOP. Dropping it merely prevents an optimization.
7095 erase_if(Bundles, [&](OperandBundleDef &OBD) {
7096 return OBD.getTag() == "clang.arc.attachedcall" &&
7097 OBD.inputs().empty();
7098 });
7099}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static bool upgradeIntrinsicDeclWithDefaultArgs(Function *F, Function *&NewFn)
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static bool upgradeIntrinsicCallWithDefaultArgs(CallBase *CI, Function *NewFn, IRBuilder<> &Builder)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
void setDebugLoc(DebugLoc Loc)
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:867
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:444
size_t arg_size() const
Definition Function.h:875
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:860
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:80
LinkageTypes getLinkage() const
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static StringRef dropLLVMManglingEscape(StringRef Name)
If the given string begins with the GlobalValue name mangling escape character '\1',...
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:577
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1426
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1554
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1432
LLVMContext & getContext() const
Definition Metadata.h:1233
Tracking metadata reference owned by Metadata.
Definition Metadata.h:891
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1511
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1742
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1838
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:889
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:888
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:736
static constexpr size_t npos
Definition StringRef.h:58
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:597
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:635
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:850
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:477
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:308
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:284
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:394
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI ID lookupIntrinsicID(StringRef Name)
This does the actual lookup of an intrinsic ID which matches the given function name.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
LLVM_ABI std::pair< unsigned, ArrayRef< uint64_t > > getAllDefaultArgValues(ID IID)
Returns the first default argument index and an ArrayRef of all default values for the trailing param...
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
@ Length
Definition DWP.cpp:573
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool UpgradeCFIFunctionsMetadata(Module &M)
Upgrade the cfi.functions metadata node by calculating and inserting the GUID for each function entry...
LLVM_ABI void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106