LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
52#include "llvm/Support/Regex.h"
55#include <cstdint>
56#include <cstring>
57#include <numeric>
58
59using namespace llvm;
60
61static cl::opt<bool>
62 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
63 cl::desc("Disable autoupgrade of debug info"));
64
65static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
66
67// Report a fatal error along with the
68// Call Instruction which caused the error
69[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
70 CallBase *CI) {
71 CI->print(llvm::errs());
72 llvm::errs() << "\n";
74}
75
76// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
77// changed their type from v4f32 to v2i64.
79 Function *&NewFn) {
80 // Check whether this is an old version of the function, which received
81 // v4f32 arguments.
82 Type *Arg0Type = F->getFunctionType()->getParamType(0);
83 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
84 return false;
85
86 // Yes, it's old, replace it with new version.
87 rename(F);
88 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
89 return true;
90}
91
92// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
93// arguments have changed their type from i32 to i8.
95 Function *&NewFn) {
96 // Check that the last argument is an i32.
97 Type *LastArgType = F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
99 if (!LastArgType->isIntegerTy(32))
100 return false;
101
102 // Move this function aside and map down.
103 rename(F);
104 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
105 return true;
106}
107
108// Upgrade the declaration of fp compare intrinsics that change return type
109// from scalar to vXi1 mask.
111 Function *&NewFn) {
112 // Check if the return type is a vector.
113 if (F->getReturnType()->isVectorTy())
114 return false;
115
116 rename(F);
117 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
118 return true;
119}
120
121// Upgrade the declaration of multiply and add bytes intrinsics whose input
122// arguments' types have changed from vectors of i32 to vectors of i8
124 Function *&NewFn) {
125 // check if input argument type is a vector of i8
126 Type *Arg1Type = F->getFunctionType()->getParamType(1);
127 Type *Arg2Type = F->getFunctionType()->getParamType(2);
128 if (Arg1Type->isVectorTy() &&
129 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
130 Arg2Type->isVectorTy() &&
131 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
132 return false;
133
134 rename(F);
135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
136 return true;
137}
138
139// Upgrade the declaration of multipy and add words intrinsics whose input
140// arguments' types have changed to vectors of i32 to vectors of i16
142 Function *&NewFn) {
143 // check if input argument type is a vector of i16
144 Type *Arg1Type = F->getFunctionType()->getParamType(1);
145 Type *Arg2Type = F->getFunctionType()->getParamType(2);
146 if (Arg1Type->isVectorTy() &&
147 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
148 Arg2Type->isVectorTy() &&
149 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getReturnType()->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 Function *&NewFn) {
169 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
170 return false;
171
172 rename(F);
173 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
174 return true;
175}
176
178 // All of the intrinsics matches below should be marked with which llvm
179 // version started autoupgrading them. At some point in the future we would
180 // like to use this information to remove upgrade code for some older
181 // intrinsics. It is currently undecided how we will determine that future
182 // point.
183 if (Name.consume_front("avx."))
184 return (Name.starts_with("blend.p") || // Added in 3.7
185 Name == "cvt.ps2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.ps.256" || // Added in 7.0
188 Name.starts_with("movnt.") || // Added in 3.2
189 Name.starts_with("sqrt.p") || // Added in 7.0
190 Name.starts_with("storeu.") || // Added in 3.9
191 Name.starts_with("vbroadcast.s") || // Added in 3.5
192 Name.starts_with("vbroadcastf128") || // Added in 4.0
193 Name.starts_with("vextractf128.") || // Added in 3.7
194 Name.starts_with("vinsertf128.") || // Added in 3.7
195 Name.starts_with("vperm2f128.") || // Added in 6.0
196 Name.starts_with("vpermil.")); // Added in 3.1
197
198 if (Name.consume_front("avx2."))
199 return (Name == "movntdqa" || // Added in 5.0
200 Name.starts_with("pabs.") || // Added in 6.0
201 Name.starts_with("padds.") || // Added in 8.0
202 Name.starts_with("paddus.") || // Added in 8.0
203 Name.starts_with("pblendd.") || // Added in 3.7
204 Name == "pblendw" || // Added in 3.7
205 Name.starts_with("pbroadcast") || // Added in 3.8
206 Name.starts_with("pcmpeq.") || // Added in 3.1
207 Name.starts_with("pcmpgt.") || // Added in 3.1
208 Name.starts_with("pmax") || // Added in 3.9
209 Name.starts_with("pmin") || // Added in 3.9
210 Name.starts_with("pmovsx") || // Added in 3.9
211 Name.starts_with("pmovzx") || // Added in 3.9
212 Name == "pmul.dq" || // Added in 7.0
213 Name == "pmulu.dq" || // Added in 7.0
214 Name.starts_with("psll.dq") || // Added in 3.7
215 Name.starts_with("psrl.dq") || // Added in 3.7
216 Name.starts_with("psubs.") || // Added in 8.0
217 Name.starts_with("psubus.") || // Added in 8.0
218 Name.starts_with("vbroadcast") || // Added in 3.8
219 Name == "vbroadcasti128" || // Added in 3.7
220 Name == "vextracti128" || // Added in 3.7
221 Name == "vinserti128" || // Added in 3.7
222 Name == "vperm2i128"); // Added in 6.0
223
224 if (Name.consume_front("avx512.")) {
225 if (Name.consume_front("mask."))
226 // 'avx512.mask.*'
227 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
228 Name.starts_with("and.") || // Added in 3.9
229 Name.starts_with("andn.") || // Added in 3.9
230 Name.starts_with("broadcast.s") || // Added in 3.9
231 Name.starts_with("broadcastf32x4.") || // Added in 6.0
232 Name.starts_with("broadcastf32x8.") || // Added in 6.0
233 Name.starts_with("broadcastf64x2.") || // Added in 6.0
234 Name.starts_with("broadcastf64x4.") || // Added in 6.0
235 Name.starts_with("broadcasti32x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x8.") || // Added in 6.0
237 Name.starts_with("broadcasti64x2.") || // Added in 6.0
238 Name.starts_with("broadcasti64x4.") || // Added in 6.0
239 Name.starts_with("cmp.b") || // Added in 5.0
240 Name.starts_with("cmp.d") || // Added in 5.0
241 Name.starts_with("cmp.q") || // Added in 5.0
242 Name.starts_with("cmp.w") || // Added in 5.0
243 Name.starts_with("compress.b") || // Added in 9.0
244 Name.starts_with("compress.d") || // Added in 9.0
245 Name.starts_with("compress.p") || // Added in 9.0
246 Name.starts_with("compress.q") || // Added in 9.0
247 Name.starts_with("compress.store.") || // Added in 7.0
248 Name.starts_with("compress.w") || // Added in 9.0
249 Name.starts_with("conflict.") || // Added in 9.0
250 Name.starts_with("cvtdq2pd.") || // Added in 4.0
251 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
252 Name == "cvtpd2dq.256" || // Added in 7.0
253 Name == "cvtpd2ps.256" || // Added in 7.0
254 Name == "cvtps2pd.128" || // Added in 7.0
255 Name == "cvtps2pd.256" || // Added in 7.0
256 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
257 Name == "cvtqq2ps.256" || // Added in 9.0
258 Name == "cvtqq2ps.512" || // Added in 9.0
259 Name == "cvttpd2dq.256" || // Added in 7.0
260 Name == "cvttps2dq.128" || // Added in 7.0
261 Name == "cvttps2dq.256" || // Added in 7.0
262 Name.starts_with("cvtudq2pd.") || // Added in 4.0
263 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
264 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
265 Name == "cvtuqq2ps.256" || // Added in 9.0
266 Name == "cvtuqq2ps.512" || // Added in 9.0
267 Name.starts_with("dbpsadbw.") || // Added in 7.0
268 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
269 Name.starts_with("expand.b") || // Added in 9.0
270 Name.starts_with("expand.d") || // Added in 9.0
271 Name.starts_with("expand.load.") || // Added in 7.0
272 Name.starts_with("expand.p") || // Added in 9.0
273 Name.starts_with("expand.q") || // Added in 9.0
274 Name.starts_with("expand.w") || // Added in 9.0
275 Name.starts_with("fpclass.p") || // Added in 7.0
276 Name.starts_with("insert") || // Added in 4.0
277 Name.starts_with("load.") || // Added in 3.9
278 Name.starts_with("loadu.") || // Added in 3.9
279 Name.starts_with("lzcnt.") || // Added in 5.0
280 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("movddup") || // Added in 3.9
283 Name.starts_with("move.s") || // Added in 4.0
284 Name.starts_with("movshdup") || // Added in 3.9
285 Name.starts_with("movsldup") || // Added in 3.9
286 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
287 Name.starts_with("or.") || // Added in 3.9
288 Name.starts_with("pabs.") || // Added in 6.0
289 Name.starts_with("packssdw.") || // Added in 5.0
290 Name.starts_with("packsswb.") || // Added in 5.0
291 Name.starts_with("packusdw.") || // Added in 5.0
292 Name.starts_with("packuswb.") || // Added in 5.0
293 Name.starts_with("padd.") || // Added in 4.0
294 Name.starts_with("padds.") || // Added in 8.0
295 Name.starts_with("paddus.") || // Added in 8.0
296 Name.starts_with("palignr.") || // Added in 3.9
297 Name.starts_with("pand.") || // Added in 3.9
298 Name.starts_with("pandn.") || // Added in 3.9
299 Name.starts_with("pavg") || // Added in 6.0
300 Name.starts_with("pbroadcast") || // Added in 6.0
301 Name.starts_with("pcmpeq.") || // Added in 3.9
302 Name.starts_with("pcmpgt.") || // Added in 3.9
303 Name.starts_with("perm.df.") || // Added in 3.9
304 Name.starts_with("perm.di.") || // Added in 3.9
305 Name.starts_with("permvar.") || // Added in 7.0
306 Name.starts_with("pmaddubs.w.") || // Added in 7.0
307 Name.starts_with("pmaddw.d.") || // Added in 7.0
308 Name.starts_with("pmax") || // Added in 4.0
309 Name.starts_with("pmin") || // Added in 4.0
310 Name == "pmov.qd.256" || // Added in 9.0
311 Name == "pmov.qd.512" || // Added in 9.0
312 Name == "pmov.wb.256" || // Added in 9.0
313 Name == "pmov.wb.512" || // Added in 9.0
314 Name.starts_with("pmovsx") || // Added in 4.0
315 Name.starts_with("pmovzx") || // Added in 4.0
316 Name.starts_with("pmul.dq.") || // Added in 4.0
317 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
318 Name.starts_with("pmulh.w.") || // Added in 7.0
319 Name.starts_with("pmulhu.w.") || // Added in 7.0
320 Name.starts_with("pmull.") || // Added in 4.0
321 Name.starts_with("pmultishift.qb.") || // Added in 8.0
322 Name.starts_with("pmulu.dq.") || // Added in 4.0
323 Name.starts_with("por.") || // Added in 3.9
324 Name.starts_with("prol.") || // Added in 8.0
325 Name.starts_with("prolv.") || // Added in 8.0
326 Name.starts_with("pror.") || // Added in 8.0
327 Name.starts_with("prorv.") || // Added in 8.0
328 Name.starts_with("pshuf.b.") || // Added in 4.0
329 Name.starts_with("pshuf.d.") || // Added in 3.9
330 Name.starts_with("pshufh.w.") || // Added in 3.9
331 Name.starts_with("pshufl.w.") || // Added in 3.9
332 Name.starts_with("psll.d") || // Added in 4.0
333 Name.starts_with("psll.q") || // Added in 4.0
334 Name.starts_with("psll.w") || // Added in 4.0
335 Name.starts_with("pslli") || // Added in 4.0
336 Name.starts_with("psllv") || // Added in 4.0
337 Name.starts_with("psra.d") || // Added in 4.0
338 Name.starts_with("psra.q") || // Added in 4.0
339 Name.starts_with("psra.w") || // Added in 4.0
340 Name.starts_with("psrai") || // Added in 4.0
341 Name.starts_with("psrav") || // Added in 4.0
342 Name.starts_with("psrl.d") || // Added in 4.0
343 Name.starts_with("psrl.q") || // Added in 4.0
344 Name.starts_with("psrl.w") || // Added in 4.0
345 Name.starts_with("psrli") || // Added in 4.0
346 Name.starts_with("psrlv") || // Added in 4.0
347 Name.starts_with("psub.") || // Added in 4.0
348 Name.starts_with("psubs.") || // Added in 8.0
349 Name.starts_with("psubus.") || // Added in 8.0
350 Name.starts_with("pternlog.") || // Added in 7.0
351 Name.starts_with("punpckh") || // Added in 3.9
352 Name.starts_with("punpckl") || // Added in 3.9
353 Name.starts_with("pxor.") || // Added in 3.9
354 Name.starts_with("shuf.f") || // Added in 6.0
355 Name.starts_with("shuf.i") || // Added in 6.0
356 Name.starts_with("shuf.p") || // Added in 4.0
357 Name.starts_with("sqrt.p") || // Added in 7.0
358 Name.starts_with("store.b.") || // Added in 3.9
359 Name.starts_with("store.d.") || // Added in 3.9
360 Name.starts_with("store.p") || // Added in 3.9
361 Name.starts_with("store.q.") || // Added in 3.9
362 Name.starts_with("store.w.") || // Added in 3.9
363 Name == "store.ss" || // Added in 7.0
364 Name.starts_with("storeu.") || // Added in 3.9
365 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
366 Name.starts_with("ucmp.") || // Added in 5.0
367 Name.starts_with("unpckh.") || // Added in 3.9
368 Name.starts_with("unpckl.") || // Added in 3.9
369 Name.starts_with("valign.") || // Added in 4.0
370 Name == "vcvtph2ps.128" || // Added in 11.0
371 Name == "vcvtph2ps.256" || // Added in 11.0
372 Name.starts_with("vextract") || // Added in 4.0
373 Name.starts_with("vfmadd.") || // Added in 7.0
374 Name.starts_with("vfmaddsub.") || // Added in 7.0
375 Name.starts_with("vfnmadd.") || // Added in 7.0
376 Name.starts_with("vfnmsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermi2var.") || // Added in 7.0
382 Name.starts_with("vpermil.p") || // Added in 3.9
383 Name.starts_with("vpermilvar.") || // Added in 4.0
384 Name.starts_with("vpermt2var.") || // Added in 7.0
385 Name.starts_with("vpmadd52") || // Added in 7.0
386 Name.starts_with("vpshld.") || // Added in 7.0
387 Name.starts_with("vpshldv.") || // Added in 8.0
388 Name.starts_with("vpshrd.") || // Added in 7.0
389 Name.starts_with("vpshrdv.") || // Added in 8.0
390 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
391 Name.starts_with("xor.")); // Added in 3.9
392
393 if (Name.consume_front("mask3."))
394 // 'avx512.mask3.*'
395 return (Name.starts_with("vfmadd.") || // Added in 7.0
396 Name.starts_with("vfmaddsub.") || // Added in 7.0
397 Name.starts_with("vfmsub.") || // Added in 7.0
398 Name.starts_with("vfmsubadd.") || // Added in 7.0
399 Name.starts_with("vfnmsub.")); // Added in 7.0
400
401 if (Name.consume_front("maskz."))
402 // 'avx512.maskz.*'
403 return (Name.starts_with("pternlog.") || // Added in 7.0
404 Name.starts_with("vfmadd.") || // Added in 7.0
405 Name.starts_with("vfmaddsub.") || // Added in 7.0
406 Name.starts_with("vpdpbusd.") || // Added in 7.0
407 Name.starts_with("vpdpbusds.") || // Added in 7.0
408 Name.starts_with("vpdpwssd.") || // Added in 7.0
409 Name.starts_with("vpdpwssds.") || // Added in 7.0
410 Name.starts_with("vpermt2var.") || // Added in 7.0
411 Name.starts_with("vpmadd52") || // Added in 7.0
412 Name.starts_with("vpshldv.") || // Added in 8.0
413 Name.starts_with("vpshrdv.")); // Added in 8.0
414
415 // 'avx512.*'
416 return (Name == "movntdqa" || // Added in 5.0
417 Name == "pmul.dq.512" || // Added in 7.0
418 Name == "pmulu.dq.512" || // Added in 7.0
419 Name.starts_with("broadcastm") || // Added in 6.0
420 Name.starts_with("cmp.p") || // Added in 12.0
421 Name.starts_with("cvtb2mask.") || // Added in 7.0
422 Name.starts_with("cvtd2mask.") || // Added in 7.0
423 Name.starts_with("cvtmask2") || // Added in 5.0
424 Name.starts_with("cvtq2mask.") || // Added in 7.0
425 Name == "cvtusi2sd" || // Added in 7.0
426 Name.starts_with("cvtw2mask.") || // Added in 7.0
427 Name == "kand.w" || // Added in 7.0
428 Name == "kandn.w" || // Added in 7.0
429 Name == "knot.w" || // Added in 7.0
430 Name == "kor.w" || // Added in 7.0
431 Name == "kortestc.w" || // Added in 7.0
432 Name == "kortestz.w" || // Added in 7.0
433 Name.starts_with("kunpck") || // added in 6.0
434 Name == "kxnor.w" || // Added in 7.0
435 Name == "kxor.w" || // Added in 7.0
436 Name.starts_with("padds.") || // Added in 8.0
437 Name.starts_with("pbroadcast") || // Added in 3.9
438 Name.starts_with("prol") || // Added in 8.0
439 Name.starts_with("pror") || // Added in 8.0
440 Name.starts_with("psll.dq") || // Added in 3.9
441 Name.starts_with("psrl.dq") || // Added in 3.9
442 Name.starts_with("psubs.") || // Added in 8.0
443 Name.starts_with("ptestm") || // Added in 6.0
444 Name.starts_with("ptestnm") || // Added in 6.0
445 Name.starts_with("storent.") || // Added in 3.9
446 Name.starts_with("vbroadcast.s") || // Added in 7.0
447 Name.starts_with("vpshld.") || // Added in 8.0
448 Name.starts_with("vpshrd.")); // Added in 8.0
449 }
450
451 if (Name.consume_front("fma."))
452 return (Name.starts_with("vfmadd.") || // Added in 7.0
453 Name.starts_with("vfmsub.") || // Added in 7.0
454 Name.starts_with("vfmsubadd.") || // Added in 7.0
455 Name.starts_with("vfnmadd.") || // Added in 7.0
456 Name.starts_with("vfnmsub.")); // Added in 7.0
457
458 if (Name.consume_front("fma4."))
459 return Name.starts_with("vfmadd.s"); // Added in 7.0
460
461 if (Name.consume_front("sse."))
462 return (Name == "add.ss" || // Added in 4.0
463 Name == "cvtsi2ss" || // Added in 7.0
464 Name == "cvtsi642ss" || // Added in 7.0
465 Name == "div.ss" || // Added in 4.0
466 Name == "mul.ss" || // Added in 4.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.ss" || // Added in 7.0
469 Name.starts_with("storeu.") || // Added in 3.9
470 Name == "sub.ss"); // Added in 4.0
471
472 if (Name.consume_front("sse2."))
473 return (Name == "add.sd" || // Added in 4.0
474 Name == "cvtdq2pd" || // Added in 3.9
475 Name == "cvtdq2ps" || // Added in 7.0
476 Name == "cvtps2pd" || // Added in 3.9
477 Name == "cvtsi2sd" || // Added in 7.0
478 Name == "cvtsi642sd" || // Added in 7.0
479 Name == "cvtss2sd" || // Added in 7.0
480 Name == "div.sd" || // Added in 4.0
481 Name == "mul.sd" || // Added in 4.0
482 Name.starts_with("padds.") || // Added in 8.0
483 Name.starts_with("paddus.") || // Added in 8.0
484 Name.starts_with("pcmpeq.") || // Added in 3.1
485 Name.starts_with("pcmpgt.") || // Added in 3.1
486 Name == "pmaxs.w" || // Added in 3.9
487 Name == "pmaxu.b" || // Added in 3.9
488 Name == "pmins.w" || // Added in 3.9
489 Name == "pminu.b" || // Added in 3.9
490 Name == "pmulu.dq" || // Added in 7.0
491 Name.starts_with("pshuf") || // Added in 3.9
492 Name.starts_with("psll.dq") || // Added in 3.7
493 Name.starts_with("psrl.dq") || // Added in 3.7
494 Name.starts_with("psubs.") || // Added in 8.0
495 Name.starts_with("psubus.") || // Added in 8.0
496 Name.starts_with("sqrt.p") || // Added in 7.0
497 Name == "sqrt.sd" || // Added in 7.0
498 Name == "storel.dq" || // Added in 3.9
499 Name.starts_with("storeu.") || // Added in 3.9
500 Name == "sub.sd"); // Added in 4.0
501
502 if (Name.consume_front("sse41."))
503 return (Name.starts_with("blendp") || // Added in 3.7
504 Name == "movntdqa" || // Added in 5.0
505 Name == "pblendw" || // Added in 3.7
506 Name == "pmaxsb" || // Added in 3.9
507 Name == "pmaxsd" || // Added in 3.9
508 Name == "pmaxud" || // Added in 3.9
509 Name == "pmaxuw" || // Added in 3.9
510 Name == "pminsb" || // Added in 3.9
511 Name == "pminsd" || // Added in 3.9
512 Name == "pminud" || // Added in 3.9
513 Name == "pminuw" || // Added in 3.9
514 Name.starts_with("pmovsx") || // Added in 3.8
515 Name.starts_with("pmovzx") || // Added in 3.9
516 Name == "pmuldq"); // Added in 7.0
517
518 if (Name.consume_front("sse42."))
519 return Name == "crc32.64.8"; // Added in 3.4
520
521 if (Name.consume_front("sse4a."))
522 return Name.starts_with("movnt."); // Added in 3.9
523
524 if (Name.consume_front("ssse3."))
525 return (Name == "pabs.b.128" || // Added in 6.0
526 Name == "pabs.d.128" || // Added in 6.0
527 Name == "pabs.w.128"); // Added in 6.0
528
529 if (Name.consume_front("xop."))
530 return (Name == "vpcmov" || // Added in 3.8
531 Name == "vpcmov.256" || // Added in 5.0
532 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
533 Name.starts_with("vprot")); // Added in 8.0
534
535 return (Name == "addcarry.u32" || // Added in 8.0
536 Name == "addcarry.u64" || // Added in 8.0
537 Name == "addcarryx.u32" || // Added in 8.0
538 Name == "addcarryx.u64" || // Added in 8.0
539 Name == "subborrow.u32" || // Added in 8.0
540 Name == "subborrow.u64" || // Added in 8.0
541 Name.starts_with("vcvtph2ps.")); // Added in 11.0
542}
543
545 Function *&NewFn) {
546 // Only handle intrinsics that start with "x86.".
547 if (!Name.consume_front("x86."))
548 return false;
549
550 if (shouldUpgradeX86Intrinsic(F, Name)) {
551 NewFn = nullptr;
552 return true;
553 }
554
555 if (Name == "rdtscp") { // Added in 8.0
556 // If this intrinsic has 0 operands, it's the new version.
557 if (F->getFunctionType()->getNumParams() == 0)
558 return false;
559
560 rename(F);
561 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
562 Intrinsic::x86_rdtscp);
563 return true;
564 }
565
567
568 // SSE4.1 ptest functions may have an old signature.
569 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
571 .Case("c", Intrinsic::x86_sse41_ptestc)
572 .Case("z", Intrinsic::x86_sse41_ptestz)
573 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
576 return upgradePTESTIntrinsic(F, ID, NewFn);
577
578 return false;
579 }
580
581 // Several blend and other instructions with masks used the wrong number of
582 // bits.
583
584 // Added in 3.6
586 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
594 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
595
596 if (Name.consume_front("avx512.")) {
597 if (Name.consume_front("mask.cmp.")) {
598 // Added in 7.0
600 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
608 return upgradeX86MaskedFPCompare(F, ID, NewFn);
609 } else if (Name.starts_with("vpdpbusd.") ||
610 Name.starts_with("vpdpbusds.")) {
611 // Added in 21.1
613 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
621 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
622 } else if (Name.starts_with("vpdpwssd.") ||
623 Name.starts_with("vpdpwssds.")) {
624 // Added in 21.1
626 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
634 return upgradeX86MultiplyAddWords(F, ID, NewFn);
635 }
636 return false; // No other 'x86.avx512.*'.
637 }
638
639 if (Name.consume_front("avx2.")) {
640 if (Name.consume_front("vpdpb")) {
641 // Added in 21.1
643 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
657 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
658 } else if (Name.consume_front("vpdpw")) {
659 // Added in 21.1
661 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
675 return upgradeX86MultiplyAddWords(F, ID, NewFn);
676 }
677 return false; // No other 'x86.avx2.*'
678 }
679
680 if (Name.consume_front("avx10.")) {
681 if (Name.consume_front("vpdpb")) {
682 // Added in 21.1
684 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
692 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
693 } else if (Name.consume_front("vpdpw")) {
695 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
703 return upgradeX86MultiplyAddWords(F, ID, NewFn);
704 }
705 return false; // No other 'x86.avx10.*'
706 }
707
708 if (Name.consume_front("avx512bf16.")) {
709 // Added in 9.0
711 .Case("cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .Case("cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .Case("cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .Case("mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .Case("cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .Case("cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
725 return upgradeX86BF16Intrinsic(F, ID, NewFn);
726
727 // Added in 9.0
729 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
734 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
735 return false; // No other 'x86.avx512bf16.*'.
736 }
737
738 if (Name.consume_front("xop.")) {
740 if (Name.starts_with("vpermil2")) { // Added in 3.9
741 // Upgrade any XOP PERMIL2 index operand still using a float/double
742 // vector.
743 auto Idx = F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
753 else
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
755 }
756 } else if (F->arg_size() == 2)
757 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
759 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
762
764 rename(F);
765 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
766 return true;
767 }
768 return false; // No other 'x86.xop.*'
769 }
770
771 if (Name == "seh.recoverfp") {
772 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
773 Intrinsic::eh_recoverfp);
774 return true;
775 }
776
777 return false;
778}
779
780// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
781// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
783 StringRef Name,
784 Function *&NewFn) {
785 if (Name.starts_with("rbit")) {
786 // '(arm|aarch64).rbit'.
788 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
789 return true;
790 }
791
792 if (Name == "thread.pointer") {
793 // '(arm|aarch64).thread.pointer'.
795 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
796 return true;
797 }
798
799 bool Neon = Name.consume_front("neon.");
800 if (Neon) {
801 // '(arm|aarch64).neon.*'.
802 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
803 // v16i8 respectively.
804 if (Name.consume_front("bfdot.")) {
805 // (arm|aarch64).neon.bfdot.*'.
808 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
809 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
810 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
813 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
816 LLVMContext &Ctx = F->getParent()->getContext();
817 std::array<Type *, 2> Tys{
818 {F->getReturnType(),
819 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
820 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
821 return true;
822 }
823 return false; // No other '(arm|aarch64).neon.bfdot.*'.
824 }
825
826 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
827 // anymore and accept v8bf16 instead of v16i8.
828 if (Name.consume_front("bfm")) {
829 // (arm|aarch64).neon.bfm*'.
830 if (Name.consume_back(".v4f32.v16i8")) {
831 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
834 .Case("mla",
835 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
836 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
837 .Case("lalb",
838 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
839 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
840 .Case("lalt",
841 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
842 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
849 }
850 return false; // No other '(arm|aarch64).neon.bfm*.
851 }
852 // Continue on to Aarch64 Neon or Arm Neon.
853 }
854 // Continue on to Arm or Aarch64.
855
856 if (IsArm) {
857 // 'arm.*'.
858 if (Neon) {
859 // 'arm.neon.*'.
861 .StartsWith("vclz.", Intrinsic::ctlz)
862 .StartsWith("vcnt.", Intrinsic::ctpop)
863 .StartsWith("vqadds.", Intrinsic::sadd_sat)
864 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
865 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
866 .StartsWith("vqsubu.", Intrinsic::usub_sat)
867 .StartsWith("vrinta.", Intrinsic::round)
868 .StartsWith("vrintn.", Intrinsic::roundeven)
869 .StartsWith("vrintm.", Intrinsic::floor)
870 .StartsWith("vrintp.", Intrinsic::ceil)
871 .StartsWith("vrintx.", Intrinsic::rint)
872 .StartsWith("vrintz.", Intrinsic::trunc)
875 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
876 F->arg_begin()->getType());
877 return true;
878 }
879
880 if (Name.consume_front("vst")) {
881 // 'arm.neon.vst*'.
882 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
884 if (vstRegex.match(Name, &Groups)) {
885 static const Intrinsic::ID StoreInts[] = {
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
888
889 static const Intrinsic::ID StoreLaneInts[] = {
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
892
893 auto fArgs = F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
895 if (Groups[1].size() == 1)
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
898 else
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
901 return true;
902 }
903 return false; // No other 'arm.neon.vst*'.
904 }
905
906 return false; // No other 'arm.neon.*'.
907 }
908
909 if (Name.consume_front("mve.")) {
910 // 'arm.mve.*'.
911 if (Name == "vctp64") {
912 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
913 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
914 // the function and deal with it below in UpgradeIntrinsicCall.
915 rename(F);
916 return true;
917 }
918 return false; // Not 'arm.mve.vctp64'.
919 }
920
921 if (Name.starts_with("vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
924 return true;
925 }
926
927 // These too are changed to accept a v2i1 instead of the old v4i1.
928 if (Name.consume_back(".v4i1")) {
929 // 'arm.mve.*.v4i1'.
930 if (Name.consume_back(".predicated.v2i64.v4i32"))
931 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
932 return Name == "mull.int" || Name == "vqdmull";
933
934 if (Name.consume_back(".v2i64")) {
935 // 'arm.mve.*.v2i64.v4i1'
936 bool IsGather = Name.consume_front("vldr.gather.");
937 if (IsGather || Name.consume_front("vstr.scatter.")) {
938 if (Name.consume_front("base.")) {
939 // Optional 'wb.' prefix.
940 Name.consume_front("wb.");
941 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
942 // predicated.v2i64.v2i64.v4i1'.
943 return Name == "predicated.v2i64";
944 }
945
946 if (Name.consume_front("offset.predicated."))
947 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
948 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
949
950 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
951 return false;
952 }
953
954 return false; // No other 'arm.mve.*.v2i64.v4i1'.
955 }
956 return false; // No other 'arm.mve.*.v4i1'.
957 }
958 return false; // No other 'arm.mve.*'.
959 }
960
961 if (Name.consume_front("cde.vcx")) {
962 // 'arm.cde.vcx*'.
963 if (Name.consume_back(".predicated.v2i64.v4i1"))
964 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
965 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
966 Name == "3q" || Name == "3qa";
967
968 return false; // No other 'arm.cde.vcx*'.
969 }
970 } else {
971 // 'aarch64.*'.
972 if (Neon) {
973 // 'aarch64.neon.*'.
975 .StartsWith("frintn", Intrinsic::roundeven)
976 .StartsWith("rbit", Intrinsic::bitreverse)
979 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
980 F->arg_begin()->getType());
981 return true;
982 }
983
984 if (Name.starts_with("addp")) {
985 // 'aarch64.neon.addp*'.
986 if (F->arg_size() != 2)
987 return false; // Invalid IR.
988 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
992 return true;
993 }
994 }
995
996 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
997 if (Name.starts_with("bfcvt")) {
998 NewFn = nullptr;
999 return true;
1000 }
1001
1002 return false; // No other 'aarch64.neon.*'.
1003 }
1004 if (Name.consume_front("sve.")) {
1005 // 'aarch64.sve.*'.
1006 if (Name.consume_front("bf")) {
1007 if (Name.consume_back(".lane")) {
1008 // 'aarch64.sve.bf*.lane'.
1011 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1012 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1013 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1016 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1017 return true;
1018 }
1019 return false; // No other 'aarch64.sve.bf*.lane'.
1020 }
1021 return false; // No other 'aarch64.sve.bf*'.
1022 }
1023
1024 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1025 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1026 NewFn = nullptr;
1027 return true;
1028 }
1029
1030 if (Name.consume_front("addqv")) {
1031 // 'aarch64.sve.addqv'.
1032 if (!F->getReturnType()->isFPOrFPVectorTy())
1033 return false;
1034
1035 auto Args = F->getFunctionType()->params();
1036 Type *Tys[] = {F->getReturnType(), Args[1]};
1038 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1039 return true;
1040 }
1041
1042 if (Name.consume_front("ld")) {
1043 // 'aarch64.sve.ld*'.
1044 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1045 if (LdRegex.match(Name)) {
1046 Type *ScalarTy =
1047 cast<VectorType>(F->getReturnType())->getElementType();
1048 ElementCount EC =
1049 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1050 Type *Ty = VectorType::get(ScalarTy, EC);
1051 static const Intrinsic::ID LoadIDs[] = {
1052 Intrinsic::aarch64_sve_ld2_sret,
1053 Intrinsic::aarch64_sve_ld3_sret,
1054 Intrinsic::aarch64_sve_ld4_sret,
1055 };
1056 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1057 LoadIDs[Name[0] - '2'], Ty);
1058 return true;
1059 }
1060 return false; // No other 'aarch64.sve.ld*'.
1061 }
1062
1063 if (Name.consume_front("tuple.")) {
1064 // 'aarch64.sve.tuple.*'.
1065 if (Name.starts_with("get")) {
1066 // 'aarch64.sve.tuple.get*'.
1067 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1069 F->getParent(), Intrinsic::vector_extract, Tys);
1070 return true;
1071 }
1072
1073 if (Name.starts_with("set")) {
1074 // 'aarch64.sve.tuple.set*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {Args[0], Args[2], Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1079 return true;
1080 }
1081
1082 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1083 if (CreateTupleRegex.match(Name)) {
1084 // 'aarch64.sve.tuple.create*'.
1085 auto Args = F->getFunctionType()->params();
1086 Type *Tys[] = {F->getReturnType(), Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1089 return true;
1090 }
1091 return false; // No other 'aarch64.sve.tuple.*'.
1092 }
1093
1094 if (Name.starts_with("rev.nxv")) {
1095 // 'aarch64.sve.rev.<Ty>'
1097 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1098 return true;
1099 }
1100
1101 return false; // No other 'aarch64.sve.*'.
1102 }
1103 }
1104 return false; // No other 'arm.*', 'aarch64.*'.
1105}
1106
1108 StringRef Name) {
1109 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1112 .Case("im2col.3d",
1113 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1114 .Case("im2col.4d",
1115 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1116 .Case("im2col.5d",
1117 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1118 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1119 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1120 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1121 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1122 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1124
1126 return ID;
1127
1128 // These intrinsics may need upgrade for two reasons:
1129 // (1) When the address-space of the first argument is shared[AS=3]
1130 // (and we upgrade it to use shared_cluster address-space[AS=7])
1131 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1133 return ID;
1134
1135 // (2) When there are only two boolean flag arguments at the end:
1136 //
1137 // The last three parameters of the older version of these
1138 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1139 //
1140 // The newer version reads as:
1141 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1142 //
1143 // So, when the type of the [N-3]rd argument is "not i1", then
1144 // it is the older version and we need to upgrade.
1145 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1146 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1147 if (!ArgType->isIntegerTy(1))
1148 return ID;
1149 }
1150
1152}
1153
1155 StringRef Name) {
1156 if (Name.consume_front("mapa.shared.cluster"))
1157 if (F->getReturnType()->getPointerAddressSpace() ==
1159 return Intrinsic::nvvm_mapa_shared_cluster;
1160
1161 if (Name.consume_front("cp.async.bulk.")) {
1164 .Case("global.to.shared.cluster",
1165 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1166 .Case("shared.cta.to.cluster",
1167 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1169
1171 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1173 return ID;
1174 }
1175
1177}
1178
1180 if (Name.consume_front("fma.rn."))
1181 return StringSwitch<Intrinsic::ID>(Name)
1182 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1183 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1184 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1185 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1187
1188 if (Name.consume_front("fmax."))
1189 return StringSwitch<Intrinsic::ID>(Name)
1190 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1191 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1192 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1193 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1194 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1195 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1196 .Case("ftz.nan.xorsign.abs.bf16",
1197 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1198 .Case("ftz.nan.xorsign.abs.bf16x2",
1199 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1200 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1201 .Case("ftz.xorsign.abs.bf16x2",
1202 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1203 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1204 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1205 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1206 .Case("nan.xorsign.abs.bf16x2",
1207 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1208 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1209 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1211
1212 if (Name.consume_front("fmin."))
1213 return StringSwitch<Intrinsic::ID>(Name)
1214 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1215 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1216 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1217 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1218 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1219 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1220 .Case("ftz.nan.xorsign.abs.bf16",
1221 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1222 .Case("ftz.nan.xorsign.abs.bf16x2",
1223 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1224 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1225 .Case("ftz.xorsign.abs.bf16x2",
1226 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1227 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1228 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1229 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1230 .Case("nan.xorsign.abs.bf16x2",
1231 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1232 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1233 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1235
1236 if (Name.consume_front("neg."))
1237 return StringSwitch<Intrinsic::ID>(Name)
1238 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1239 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1241
1243}
1244
1246 return Name.consume_front("local") || Name.consume_front("shared") ||
1247 Name.consume_front("global") || Name.consume_front("constant") ||
1248 Name.consume_front("param");
1249}
1250
1252 const FunctionType *FuncTy) {
1253 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1254 if (Name.starts_with("to.fp16")) {
1255 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1256 HalfTy) &&
1257 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1258 FuncTy->getReturnType());
1259 }
1260
1261 if (Name.starts_with("from.fp16")) {
1262 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1263 HalfTy) &&
1264 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1265 FuncTy->getReturnType());
1266 }
1267
1268 return false;
1269}
1270
1272 bool CanUpgradeDebugIntrinsicsToRecords) {
1273 assert(F && "Illegal to upgrade a non-existent Function.");
1274
1275 StringRef Name = F->getName();
1276
1277 // Quickly eliminate it, if it's not a candidate.
1278 if (!Name.consume_front("llvm.") || Name.empty())
1279 return false;
1280
1281 switch (Name[0]) {
1282 default: break;
1283 case 'a': {
1284 bool IsArm = Name.consume_front("arm.");
1285 if (IsArm || Name.consume_front("aarch64.")) {
1286 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1287 return true;
1288 break;
1289 }
1290
1291 if (Name.consume_front("amdgcn.")) {
1292 if (Name == "alignbit") {
1293 // Target specific intrinsic became redundant
1295 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1296 return true;
1297 }
1298
1299 if (Name.consume_front("atomic.")) {
1300 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1301 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1302 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1303 // and usub_sat so there's no new declaration.
1304 NewFn = nullptr;
1305 return true;
1306 }
1307 break; // No other 'amdgcn.atomic.*'
1308 }
1309
1310 switch (F->getIntrinsicID()) {
1311 default:
1312 break;
1313 // Legacy wmma iu intrinsics without the optional clamp operand.
1314 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1315 if (F->arg_size() == 7) {
1316 NewFn = nullptr;
1317 return true;
1318 }
1319 break;
1320 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1321 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1322 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1323 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1324 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1325 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1326 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1327 if (F->arg_size() == 8) {
1328 NewFn = nullptr;
1329 return true;
1330 }
1331 break;
1332 }
1333
1334 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1335 Name.consume_front("flat.atomic.")) {
1336 if (Name.starts_with("fadd") ||
1337 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1338 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1339 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1340 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1341 // declaration.
1342 NewFn = nullptr;
1343 return true;
1344 }
1345 }
1346
1347 if (Name.starts_with("ldexp.")) {
1348 // Target specific intrinsic became redundant
1350 F->getParent(), Intrinsic::ldexp,
1351 {F->getReturnType(), F->getArg(1)->getType()});
1352 return true;
1353 }
1354 break; // No other 'amdgcn.*'
1355 }
1356
1357 break;
1358 }
1359 case 'c': {
1360 if (F->arg_size() == 1) {
1361 if (Name.consume_front("convert.")) {
1362 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1363 NewFn = nullptr;
1364 return true;
1365 }
1366 }
1367
1369 .StartsWith("ctlz.", Intrinsic::ctlz)
1370 .StartsWith("cttz.", Intrinsic::cttz)
1373 rename(F);
1374 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1375 F->arg_begin()->getType());
1376 return true;
1377 }
1378 }
1379
1380 if (F->arg_size() == 2 && Name == "coro.end") {
1381 rename(F);
1382 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1383 Intrinsic::coro_end);
1384 return true;
1385 }
1386
1387 break;
1388 }
1389 case 'd':
1390 if (Name.consume_front("dbg.")) {
1391 // Mark debug intrinsics for upgrade to new debug format.
1392 if (CanUpgradeDebugIntrinsicsToRecords) {
1393 if (Name == "addr" || Name == "value" || Name == "assign" ||
1394 Name == "declare" || Name == "label") {
1395 // There's no function to replace these with.
1396 NewFn = nullptr;
1397 // But we do want these to get upgraded.
1398 return true;
1399 }
1400 }
1401 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1402 // converted to DbgVariableRecords later.
1403 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1404 rename(F);
1405 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1406 Intrinsic::dbg_value);
1407 return true;
1408 }
1409 break; // No other 'dbg.*'.
1410 }
1411 break;
1412 case 'e':
1413 if (Name.consume_front("experimental.vector.")) {
1416 // Skip over extract.last.active, otherwise it will be 'upgraded'
1417 // to a regular vector extract which is a different operation.
1418 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1419 .StartsWith("extract.", Intrinsic::vector_extract)
1420 .StartsWith("insert.", Intrinsic::vector_insert)
1421 .StartsWith("reverse.", Intrinsic::vector_reverse)
1422 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1423 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1424 .StartsWith("partial.reduce.add",
1425 Intrinsic::vector_partial_reduce_add)
1428 const auto *FT = F->getFunctionType();
1430 if (ID == Intrinsic::vector_extract ||
1431 ID == Intrinsic::vector_interleave2)
1432 // Extracting overloads the return type.
1433 Tys.push_back(FT->getReturnType());
1434 if (ID != Intrinsic::vector_interleave2)
1435 Tys.push_back(FT->getParamType(0));
1436 if (ID == Intrinsic::vector_insert ||
1437 ID == Intrinsic::vector_partial_reduce_add)
1438 // Inserting overloads the inserted type.
1439 Tys.push_back(FT->getParamType(1));
1440 rename(F);
1441 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1442 return true;
1443 }
1444
1445 if (Name.consume_front("reduce.")) {
1447 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1448 if (R.match(Name, &Groups))
1450 .Case("add", Intrinsic::vector_reduce_add)
1451 .Case("mul", Intrinsic::vector_reduce_mul)
1452 .Case("and", Intrinsic::vector_reduce_and)
1453 .Case("or", Intrinsic::vector_reduce_or)
1454 .Case("xor", Intrinsic::vector_reduce_xor)
1455 .Case("smax", Intrinsic::vector_reduce_smax)
1456 .Case("smin", Intrinsic::vector_reduce_smin)
1457 .Case("umax", Intrinsic::vector_reduce_umax)
1458 .Case("umin", Intrinsic::vector_reduce_umin)
1459 .Case("fmax", Intrinsic::vector_reduce_fmax)
1460 .Case("fmin", Intrinsic::vector_reduce_fmin)
1462
1463 bool V2 = false;
1465 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1466 Groups.clear();
1467 V2 = true;
1468 if (R2.match(Name, &Groups))
1470 .Case("fadd", Intrinsic::vector_reduce_fadd)
1471 .Case("fmul", Intrinsic::vector_reduce_fmul)
1473 }
1475 rename(F);
1476 auto Args = F->getFunctionType()->params();
1477 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1478 {Args[V2 ? 1 : 0]});
1479 return true;
1480 }
1481 break; // No other 'expermental.vector.reduce.*'.
1482 }
1483
1484 if (Name.consume_front("splice"))
1485 return true;
1486 break; // No other 'experimental.vector.*'.
1487 }
1488 if (Name.consume_front("experimental.stepvector.")) {
1489 Intrinsic::ID ID = Intrinsic::stepvector;
1490 rename(F);
1492 F->getParent(), ID, F->getFunctionType()->getReturnType());
1493 return true;
1494 }
1495 break; // No other 'e*'.
1496 case 'f':
1497 if (Name.starts_with("flt.rounds")) {
1498 rename(F);
1499 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1500 Intrinsic::get_rounding);
1501 return true;
1502 }
1503 break;
1504 case 'i':
1505 if (Name.starts_with("invariant.group.barrier")) {
1506 // Rename invariant.group.barrier to launder.invariant.group
1507 auto Args = F->getFunctionType()->params();
1508 Type* ObjectPtr[1] = {Args[0]};
1509 rename(F);
1511 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1512 return true;
1513 }
1514 break;
1515 case 'l':
1516 if ((Name.starts_with("lifetime.start") ||
1517 Name.starts_with("lifetime.end")) &&
1518 F->arg_size() == 2) {
1519 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1520 ? Intrinsic::lifetime_start
1521 : Intrinsic::lifetime_end;
1522 rename(F);
1523 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1524 F->getArg(0)->getType());
1525 return true;
1526 }
1527 break;
1528 case 'm': {
1529 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1530 // alignment parameter to embedding the alignment as an attribute of
1531 // the pointer args.
1532 if (unsigned ID = StringSwitch<unsigned>(Name)
1533 .StartsWith("memcpy.", Intrinsic::memcpy)
1534 .StartsWith("memmove.", Intrinsic::memmove)
1535 .Default(0)) {
1536 if (F->arg_size() == 5) {
1537 rename(F);
1538 // Get the types of dest, src, and len
1539 ArrayRef<Type *> ParamTypes =
1540 F->getFunctionType()->params().slice(0, 3);
1541 NewFn =
1542 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1543 return true;
1544 }
1545 }
1546 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1547 rename(F);
1548 // Get the types of dest, and len
1549 const auto *FT = F->getFunctionType();
1550 Type *ParamTypes[2] = {
1551 FT->getParamType(0), // Dest
1552 FT->getParamType(2) // len
1553 };
1554 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1555 Intrinsic::memset, ParamTypes);
1556 return true;
1557 }
1558
1559 unsigned MaskedID =
1561 .StartsWith("masked.load", Intrinsic::masked_load)
1562 .StartsWith("masked.gather", Intrinsic::masked_gather)
1563 .StartsWith("masked.store", Intrinsic::masked_store)
1564 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1565 .Default(0);
1566 if (MaskedID && F->arg_size() == 4) {
1567 rename(F);
1568 if (MaskedID == Intrinsic::masked_load ||
1569 MaskedID == Intrinsic::masked_gather) {
1571 F->getParent(), MaskedID,
1572 {F->getReturnType(), F->getArg(0)->getType()});
1573 return true;
1574 }
1576 F->getParent(), MaskedID,
1577 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1578 return true;
1579 }
1580 break;
1581 }
1582 case 'n': {
1583 if (Name.consume_front("nvvm.")) {
1584 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1585 if (F->arg_size() == 1) {
1586 Intrinsic::ID IID =
1588 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1589 .Case("clz.i", Intrinsic::ctlz)
1590 .Case("popc.i", Intrinsic::ctpop)
1592 if (IID != Intrinsic::not_intrinsic) {
1593 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1594 {F->getReturnType()});
1595 return true;
1596 }
1597 } else if (F->arg_size() == 2) {
1598 Intrinsic::ID IID =
1600 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1601 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1602 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1603 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1605 if (IID != Intrinsic::not_intrinsic) {
1606 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1607 {F->getReturnType()});
1608 return true;
1609 }
1610 }
1611
1612 // Check for nvvm intrinsics that need a return type adjustment.
1613 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1615 if (IID != Intrinsic::not_intrinsic) {
1616 NewFn = nullptr;
1617 return true;
1618 }
1619 }
1620
1621 // Upgrade Distributed Shared Memory Intrinsics
1623 if (IID != Intrinsic::not_intrinsic) {
1624 rename(F);
1625 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1626 return true;
1627 }
1628
1629 // Upgrade TMA copy G2S Intrinsics
1631 if (IID != Intrinsic::not_intrinsic) {
1632 rename(F);
1633 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1634 return true;
1635 }
1636
1637 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1638 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1639 //
1640 // TODO: We could add lohi.i2d.
1641 bool Expand = false;
1642 if (Name.consume_front("abs."))
1643 // nvvm.abs.{i,ii}
1644 Expand =
1645 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1646 else if (Name.consume_front("fabs."))
1647 // nvvm.fabs.{f,ftz.f,d}
1648 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1649 else if (Name.consume_front("ex2.approx."))
1650 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1651 Expand =
1652 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1653 else if (Name.consume_front("atomic.load."))
1654 // nvvm.atomic.load.add.{f32,f64}.p
1655 // nvvm.atomic.load.{inc,dec}.32.p
1656 Expand = StringSwitch<bool>(Name)
1657 .StartsWith("add.f32.p", true)
1658 .StartsWith("add.f64.p", true)
1659 .StartsWith("inc.32.p", true)
1660 .StartsWith("dec.32.p", true)
1661 .Default(false);
1662 else if (Name.consume_front("bitcast."))
1663 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1664 Expand =
1665 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1666 else if (Name.consume_front("rotate."))
1667 // nvvm.rotate.{b32,b64,right.b64}
1668 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1669 else if (Name.consume_front("ptr.gen.to."))
1670 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1671 Expand = consumeNVVMPtrAddrSpace(Name);
1672 else if (Name.consume_front("ptr."))
1673 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1674 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1675 else if (Name.consume_front("ldg.global."))
1676 // nvvm.ldg.global.{i,p,f}
1677 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1678 Name.starts_with("p."));
1679 else
1680 Expand = StringSwitch<bool>(Name)
1681 .Case("barrier0", true)
1682 .Case("barrier.n", true)
1683 .Case("barrier.sync.cnt", true)
1684 .Case("barrier.sync", true)
1685 .Case("barrier", true)
1686 .Case("bar.sync", true)
1687 .Case("barrier0.popc", true)
1688 .Case("barrier0.and", true)
1689 .Case("barrier0.or", true)
1690 .Case("clz.ll", true)
1691 .Case("popc.ll", true)
1692 .Case("h2f", true)
1693 .Case("swap.lo.hi.b64", true)
1694 .Case("tanh.approx.f32", true)
1695 .Default(false);
1696
1697 if (Expand) {
1698 NewFn = nullptr;
1699 return true;
1700 }
1701 break; // No other 'nvvm.*'.
1702 }
1703 break;
1704 }
1705 case 'o':
1706 if (Name.starts_with("objectsize.")) {
1707 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1708 if (F->arg_size() == 2 || F->arg_size() == 3) {
1709 rename(F);
1710 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1711 Intrinsic::objectsize, Tys);
1712 return true;
1713 }
1714 }
1715 break;
1716
1717 case 'p':
1718 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1719 rename(F);
1721 F->getParent(), Intrinsic::ptr_annotation,
1722 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1723 return true;
1724 }
1725 break;
1726
1727 case 'r': {
1728 if (Name.consume_front("riscv.")) {
1731 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1732 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1733 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1734 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1737 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1738 rename(F);
1739 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1740 return true;
1741 }
1742 break; // No other applicable upgrades.
1743 }
1744
1746 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1747 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1750 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1751 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1752 rename(F);
1753 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1754 return true;
1755 }
1756 break; // No other applicable upgrades.
1757 }
1758
1760 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1761 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1762 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1763 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1764 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1765 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1768 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1769 rename(F);
1770 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1771 return true;
1772 }
1773 break; // No other applicable upgrades.
1774 }
1775
1776 // Replace llvm.riscv.clmul with llvm.clmul.
1777 if (Name == "clmul.i32" || Name == "clmul.i64") {
1779 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1780 return true;
1781 }
1782
1783 break; // No other 'riscv.*' intrinsics
1784 }
1785 } break;
1786
1787 case 's':
1788 if (Name == "stackprotectorcheck") {
1789 NewFn = nullptr;
1790 return true;
1791 }
1792 break;
1793
1794 case 't':
1795 if (Name == "thread.pointer") {
1797 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1798 return true;
1799 }
1800 break;
1801
1802 case 'v': {
1803 if (Name == "var.annotation" && F->arg_size() == 4) {
1804 rename(F);
1806 F->getParent(), Intrinsic::var_annotation,
1807 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1808 return true;
1809 }
1810 if (Name.consume_front("vector.splice")) {
1811 if (Name.starts_with(".left") || Name.starts_with(".right"))
1812 break;
1813 return true;
1814 }
1815 break;
1816 }
1817
1818 case 'w':
1819 if (Name.consume_front("wasm.")) {
1822 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1823 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1824 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1827 rename(F);
1828 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1829 F->getReturnType());
1830 return true;
1831 }
1832
1833 if (Name.consume_front("dot.i8x16.i7x16.")) {
1835 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1836 .Case("add.signed",
1837 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1840 rename(F);
1841 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1842 return true;
1843 }
1844 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1845 }
1846 break; // No other 'wasm.*'.
1847 }
1848 break;
1849
1850 case 'x':
1851 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1852 return true;
1853 }
1854
1855 auto *ST = dyn_cast<StructType>(F->getReturnType());
1856 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1857 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1858 // Replace return type with literal non-packed struct. Only do this for
1859 // intrinsics declared to return a struct, not for intrinsics with
1860 // overloaded return type, in which case the exact struct type will be
1861 // mangled into the name.
1864 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1865 auto *FT = F->getFunctionType();
1866 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1867 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1868 std::string Name = F->getName().str();
1869 rename(F);
1870 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1871 Name, F->getParent());
1872
1873 // The new function may also need remangling.
1874 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1875 NewFn = *Result;
1876 return true;
1877 }
1878 }
1879
1880 // Remangle our intrinsic since we upgrade the mangling
1882 if (Result != std::nullopt) {
1883 NewFn = *Result;
1884 return true;
1885 }
1886
1887 // This may not belong here. This function is effectively being overloaded
1888 // to both detect an intrinsic which needs upgrading, and to provide the
1889 // upgraded form of the intrinsic. We should perhaps have two separate
1890 // functions for this.
1891 return false;
1892}
1893
1895 bool CanUpgradeDebugIntrinsicsToRecords) {
1896 NewFn = nullptr;
1897 bool Upgraded =
1898 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1899
1900 // Upgrade intrinsic attributes. This does not change the function.
1901 if (NewFn)
1902 F = NewFn;
1903 if (Intrinsic::ID id = F->getIntrinsicID()) {
1904 // Only do this if the intrinsic signature is valid.
1905 SmallVector<Type *> OverloadTys;
1906 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1907 F->setAttributes(
1908 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1909 }
1910 return Upgraded;
1911}
1912
1914 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1915 GV->getName() == "llvm.global_dtors")) ||
1916 !GV->hasInitializer())
1917 return nullptr;
1919 if (!ATy)
1920 return nullptr;
1922 if (!STy || STy->getNumElements() != 2)
1923 return nullptr;
1924
1925 LLVMContext &C = GV->getContext();
1926 IRBuilder<> IRB(C);
1927 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1928 IRB.getPtrTy());
1929 Constant *Init = GV->getInitializer();
1930 unsigned N = Init->getNumOperands();
1931 std::vector<Constant *> NewCtors(N);
1932 for (unsigned i = 0; i != N; ++i) {
1933 auto Ctor = cast<Constant>(Init->getOperand(i));
1934 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1935 Ctor->getAggregateElement(1),
1937 }
1938 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1939
1940 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1941 NewInit, GV->getName());
1942}
1943
1944// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1945// to byte shuffles.
1947 unsigned Shift) {
1948 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1949 unsigned NumElts = ResultTy->getNumElements() * 8;
1950
1951 // Bitcast from a 64-bit element type to a byte element type.
1952 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1953 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1954
1955 // We'll be shuffling in zeroes.
1956 Value *Res = Constant::getNullValue(VecTy);
1957
1958 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1959 // we'll just return the zero vector.
1960 if (Shift < 16) {
1961 int Idxs[64];
1962 // 256/512-bit version is split into 2/4 16-byte lanes.
1963 for (unsigned l = 0; l != NumElts; l += 16)
1964 for (unsigned i = 0; i != 16; ++i) {
1965 unsigned Idx = NumElts + i - Shift;
1966 if (Idx < NumElts)
1967 Idx -= NumElts - 16; // end of lane, switch operand.
1968 Idxs[l + i] = Idx + l;
1969 }
1970
1971 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1972 }
1973
1974 // Bitcast back to a 64-bit element type.
1975 return Builder.CreateBitCast(Res, ResultTy, "cast");
1976}
1977
1978// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1979// to byte shuffles.
1981 unsigned Shift) {
1982 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1983 unsigned NumElts = ResultTy->getNumElements() * 8;
1984
1985 // Bitcast from a 64-bit element type to a byte element type.
1986 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1987 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1988
1989 // We'll be shuffling in zeroes.
1990 Value *Res = Constant::getNullValue(VecTy);
1991
1992 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1993 // we'll just return the zero vector.
1994 if (Shift < 16) {
1995 int Idxs[64];
1996 // 256/512-bit version is split into 2/4 16-byte lanes.
1997 for (unsigned l = 0; l != NumElts; l += 16)
1998 for (unsigned i = 0; i != 16; ++i) {
1999 unsigned Idx = i + Shift;
2000 if (Idx >= 16)
2001 Idx += NumElts - 16; // end of lane, switch operand.
2002 Idxs[l + i] = Idx + l;
2003 }
2004
2005 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2006 }
2007
2008 // Bitcast back to a 64-bit element type.
2009 return Builder.CreateBitCast(Res, ResultTy, "cast");
2010}
2011
2012static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2013 unsigned NumElts) {
2014 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2016 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2017 Mask = Builder.CreateBitCast(Mask, MaskTy);
2018
2019 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2020 // i8 and we need to extract down to the right number of elements.
2021 if (NumElts <= 4) {
2022 int Indices[4];
2023 for (unsigned i = 0; i != NumElts; ++i)
2024 Indices[i] = i;
2025 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2026 "extract");
2027 }
2028
2029 return Mask;
2030}
2031
2032static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2033 Value *Op1) {
2034 // If the mask is all ones just emit the first operation.
2035 if (const auto *C = dyn_cast<Constant>(Mask))
2036 if (C->isAllOnesValue())
2037 return Op0;
2038
2039 Mask = getX86MaskVec(Builder, Mask,
2040 cast<FixedVectorType>(Op0->getType())->getNumElements());
2041 return Builder.CreateSelect(Mask, Op0, Op1);
2042}
2043
2044static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2045 Value *Op1) {
2046 // If the mask is all ones just emit the first operation.
2047 if (const auto *C = dyn_cast<Constant>(Mask))
2048 if (C->isAllOnesValue())
2049 return Op0;
2050
2051 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2052 Mask->getType()->getIntegerBitWidth());
2053 Mask = Builder.CreateBitCast(Mask, MaskTy);
2054 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2055 return Builder.CreateSelect(Mask, Op0, Op1);
2056}
2057
2058// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2059// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2060// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2062 Value *Op1, Value *Shift,
2063 Value *Passthru, Value *Mask,
2064 bool IsVALIGN) {
2065 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2066
2067 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2068 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2069 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2070 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2071
2072 // Mask the immediate for VALIGN.
2073 if (IsVALIGN)
2074 ShiftVal &= (NumElts - 1);
2075
2076 // If palignr is shifting the pair of vectors more than the size of two
2077 // lanes, emit zero.
2078 if (ShiftVal >= 32)
2080
2081 // If palignr is shifting the pair of input vectors more than one lane,
2082 // but less than two lanes, convert to shifting in zeroes.
2083 if (ShiftVal > 16) {
2084 ShiftVal -= 16;
2085 Op1 = Op0;
2087 }
2088
2089 int Indices[64];
2090 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2091 for (unsigned l = 0; l < NumElts; l += 16) {
2092 for (unsigned i = 0; i != 16; ++i) {
2093 unsigned Idx = ShiftVal + i;
2094 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2095 Idx += NumElts - 16; // End of lane, switch operand.
2096 Indices[l + i] = Idx + l;
2097 }
2098 }
2099
2100 Value *Align = Builder.CreateShuffleVector(
2101 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2102
2103 return emitX86Select(Builder, Mask, Align, Passthru);
2104}
2105
2107 bool ZeroMask, bool IndexForm) {
2108 Type *Ty = CI.getType();
2109 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2110 unsigned EltWidth = Ty->getScalarSizeInBits();
2111 bool IsFloat = Ty->isFPOrFPVectorTy();
2112 Intrinsic::ID IID;
2113 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2114 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2115 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2116 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2117 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2118 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2119 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2120 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2121 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2123 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2125 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2127 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2129 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2131 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2133 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2135 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2137 else if (VecWidth == 128 && EltWidth == 16)
2138 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2139 else if (VecWidth == 256 && EltWidth == 16)
2140 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2141 else if (VecWidth == 512 && EltWidth == 16)
2142 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2143 else if (VecWidth == 128 && EltWidth == 8)
2144 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2145 else if (VecWidth == 256 && EltWidth == 8)
2146 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2147 else if (VecWidth == 512 && EltWidth == 8)
2148 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2149 else
2150 llvm_unreachable("Unexpected intrinsic");
2151
2152 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2153 CI.getArgOperand(2) };
2154
2155 // If this isn't index form we need to swap operand 0 and 1.
2156 if (!IndexForm)
2157 std::swap(Args[0], Args[1]);
2158
2159 Value *V = Builder.CreateIntrinsic(IID, Args);
2160 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2161 : Builder.CreateBitCast(CI.getArgOperand(1),
2162 Ty);
2163 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2164}
2165
2167 Intrinsic::ID IID) {
2168 Type *Ty = CI.getType();
2169 Value *Op0 = CI.getOperand(0);
2170 Value *Op1 = CI.getOperand(1);
2171 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2172
2173 if (CI.arg_size() == 4) { // For masked intrinsics.
2174 Value *VecSrc = CI.getOperand(2);
2175 Value *Mask = CI.getOperand(3);
2176 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2177 }
2178 return Res;
2179}
2180
2182 bool IsRotateRight) {
2183 Type *Ty = CI.getType();
2184 Value *Src = CI.getArgOperand(0);
2185 Value *Amt = CI.getArgOperand(1);
2186
2187 // Amount may be scalar immediate, in which case create a splat vector.
2188 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2189 // we only care about the lowest log2 bits anyway.
2190 if (Amt->getType() != Ty) {
2191 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2192 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2193 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2194 }
2195
2196 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2197 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2198
2199 if (CI.arg_size() == 4) { // For masked intrinsics.
2200 Value *VecSrc = CI.getOperand(2);
2201 Value *Mask = CI.getOperand(3);
2202 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2203 }
2204 return Res;
2205}
2206
2207static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2208 bool IsSigned) {
2209 Type *Ty = CI.getType();
2210 Value *LHS = CI.getArgOperand(0);
2211 Value *RHS = CI.getArgOperand(1);
2212
2213 CmpInst::Predicate Pred;
2214 switch (Imm) {
2215 case 0x0:
2216 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2217 break;
2218 case 0x1:
2219 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2220 break;
2221 case 0x2:
2222 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2223 break;
2224 case 0x3:
2225 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2226 break;
2227 case 0x4:
2228 Pred = ICmpInst::ICMP_EQ;
2229 break;
2230 case 0x5:
2231 Pred = ICmpInst::ICMP_NE;
2232 break;
2233 case 0x6:
2234 return Constant::getNullValue(Ty); // FALSE
2235 case 0x7:
2236 return Constant::getAllOnesValue(Ty); // TRUE
2237 default:
2238 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2239 }
2240
2241 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2242 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2243 return Ext;
2244}
2245
2247 bool IsShiftRight, bool ZeroMask) {
2248 Type *Ty = CI.getType();
2249 Value *Op0 = CI.getArgOperand(0);
2250 Value *Op1 = CI.getArgOperand(1);
2251 Value *Amt = CI.getArgOperand(2);
2252
2253 if (IsShiftRight)
2254 std::swap(Op0, Op1);
2255
2256 // Amount may be scalar immediate, in which case create a splat vector.
2257 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2258 // we only care about the lowest log2 bits anyway.
2259 if (Amt->getType() != Ty) {
2260 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2261 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2262 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2263 }
2264
2265 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2266 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2267
2268 unsigned NumArgs = CI.arg_size();
2269 if (NumArgs >= 4) { // For masked intrinsics.
2270 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2271 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2272 CI.getArgOperand(0);
2273 Value *Mask = CI.getOperand(NumArgs - 1);
2274 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2275 }
2276 return Res;
2277}
2278
2280 Value *Mask, bool Aligned) {
2281 const Align Alignment =
2282 Aligned
2283 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2284 : Align(1);
2285
2286 // If the mask is all ones just emit a regular store.
2287 if (const auto *C = dyn_cast<Constant>(Mask))
2288 if (C->isAllOnesValue())
2289 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2290
2291 // Convert the mask from an integer type to a vector of i1.
2292 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2293 Mask = getX86MaskVec(Builder, Mask, NumElts);
2294 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2295}
2296
2298 Value *Passthru, Value *Mask, bool Aligned) {
2299 Type *ValTy = Passthru->getType();
2300 const Align Alignment =
2301 Aligned
2302 ? Align(
2304 8)
2305 : Align(1);
2306
2307 // If the mask is all ones just emit a regular store.
2308 if (const auto *C = dyn_cast<Constant>(Mask))
2309 if (C->isAllOnesValue())
2310 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2311
2312 // Convert the mask from an integer type to a vector of i1.
2313 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2314 Mask = getX86MaskVec(Builder, Mask, NumElts);
2315 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2316}
2317
2318static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2319 Type *Ty = CI.getType();
2320 Value *Op0 = CI.getArgOperand(0);
2321 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2322 {Op0, Builder.getInt1(false)});
2323 if (CI.arg_size() == 3)
2324 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2325 return Res;
2326}
2327
2328static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2329 Type *Ty = CI.getType();
2330
2331 // Arguments have a vXi32 type so cast to vXi64.
2332 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2333 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2334
2335 if (IsSigned) {
2336 // Shift left then arithmetic shift right.
2337 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2338 LHS = Builder.CreateShl(LHS, ShiftAmt);
2339 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2340 RHS = Builder.CreateShl(RHS, ShiftAmt);
2341 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2342 } else {
2343 // Clear the upper bits.
2344 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2345 LHS = Builder.CreateAnd(LHS, Mask);
2346 RHS = Builder.CreateAnd(RHS, Mask);
2347 }
2348
2349 Value *Res = Builder.CreateMul(LHS, RHS);
2350
2351 if (CI.arg_size() == 4)
2352 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2353
2354 return Res;
2355}
2356
2357// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2359 Value *Mask) {
2360 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2361 if (Mask) {
2362 const auto *C = dyn_cast<Constant>(Mask);
2363 if (!C || !C->isAllOnesValue())
2364 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2365 }
2366
2367 if (NumElts < 8) {
2368 int Indices[8];
2369 for (unsigned i = 0; i != NumElts; ++i)
2370 Indices[i] = i;
2371 for (unsigned i = NumElts; i != 8; ++i)
2372 Indices[i] = NumElts + i % NumElts;
2373 Vec = Builder.CreateShuffleVector(Vec,
2375 Indices);
2376 }
2377 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2378}
2379
2381 unsigned CC, bool Signed) {
2382 Value *Op0 = CI.getArgOperand(0);
2383 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2384
2385 Value *Cmp;
2386 if (CC == 3) {
2388 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2389 } else if (CC == 7) {
2391 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2392 } else {
2394 switch (CC) {
2395 default: llvm_unreachable("Unknown condition code");
2396 case 0: Pred = ICmpInst::ICMP_EQ; break;
2397 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2398 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2399 case 4: Pred = ICmpInst::ICMP_NE; break;
2400 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2401 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2402 }
2403 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2404 }
2405
2406 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2407
2408 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2409}
2410
2411// Replace a masked intrinsic with an older unmasked intrinsic.
2413 Intrinsic::ID IID) {
2414 Value *Rep =
2415 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2416 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2417}
2418
2420 Value* A = CI.getArgOperand(0);
2421 Value* B = CI.getArgOperand(1);
2422 Value* Src = CI.getArgOperand(2);
2423 Value* Mask = CI.getArgOperand(3);
2424
2425 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2426 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2427 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2428 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2429 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2430 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2431}
2432
2434 Value* Op = CI.getArgOperand(0);
2435 Type* ReturnOp = CI.getType();
2436 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2437 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2438 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2439}
2440
2441// Replace intrinsic with unmasked version and a select.
2443 CallBase &CI, Value *&Rep) {
2444 Name = Name.substr(12); // Remove avx512.mask.
2445
2446 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2447 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2448 Intrinsic::ID IID;
2449 if (Name.starts_with("max.p")) {
2450 if (VecWidth == 128 && EltWidth == 32)
2451 IID = Intrinsic::x86_sse_max_ps;
2452 else if (VecWidth == 128 && EltWidth == 64)
2453 IID = Intrinsic::x86_sse2_max_pd;
2454 else if (VecWidth == 256 && EltWidth == 32)
2455 IID = Intrinsic::x86_avx_max_ps_256;
2456 else if (VecWidth == 256 && EltWidth == 64)
2457 IID = Intrinsic::x86_avx_max_pd_256;
2458 else
2459 llvm_unreachable("Unexpected intrinsic");
2460 } else if (Name.starts_with("min.p")) {
2461 if (VecWidth == 128 && EltWidth == 32)
2462 IID = Intrinsic::x86_sse_min_ps;
2463 else if (VecWidth == 128 && EltWidth == 64)
2464 IID = Intrinsic::x86_sse2_min_pd;
2465 else if (VecWidth == 256 && EltWidth == 32)
2466 IID = Intrinsic::x86_avx_min_ps_256;
2467 else if (VecWidth == 256 && EltWidth == 64)
2468 IID = Intrinsic::x86_avx_min_pd_256;
2469 else
2470 llvm_unreachable("Unexpected intrinsic");
2471 } else if (Name.starts_with("pshuf.b.")) {
2472 if (VecWidth == 128)
2473 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2474 else if (VecWidth == 256)
2475 IID = Intrinsic::x86_avx2_pshuf_b;
2476 else if (VecWidth == 512)
2477 IID = Intrinsic::x86_avx512_pshuf_b_512;
2478 else
2479 llvm_unreachable("Unexpected intrinsic");
2480 } else if (Name.starts_with("pmul.hr.sw.")) {
2481 if (VecWidth == 128)
2482 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2483 else if (VecWidth == 256)
2484 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2485 else if (VecWidth == 512)
2486 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2487 else
2488 llvm_unreachable("Unexpected intrinsic");
2489 } else if (Name.starts_with("pmulh.w.")) {
2490 if (VecWidth == 128)
2491 IID = Intrinsic::x86_sse2_pmulh_w;
2492 else if (VecWidth == 256)
2493 IID = Intrinsic::x86_avx2_pmulh_w;
2494 else if (VecWidth == 512)
2495 IID = Intrinsic::x86_avx512_pmulh_w_512;
2496 else
2497 llvm_unreachable("Unexpected intrinsic");
2498 } else if (Name.starts_with("pmulhu.w.")) {
2499 if (VecWidth == 128)
2500 IID = Intrinsic::x86_sse2_pmulhu_w;
2501 else if (VecWidth == 256)
2502 IID = Intrinsic::x86_avx2_pmulhu_w;
2503 else if (VecWidth == 512)
2504 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2505 else
2506 llvm_unreachable("Unexpected intrinsic");
2507 } else if (Name.starts_with("pmaddw.d.")) {
2508 if (VecWidth == 128)
2509 IID = Intrinsic::x86_sse2_pmadd_wd;
2510 else if (VecWidth == 256)
2511 IID = Intrinsic::x86_avx2_pmadd_wd;
2512 else if (VecWidth == 512)
2513 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2514 else
2515 llvm_unreachable("Unexpected intrinsic");
2516 } else if (Name.starts_with("pmaddubs.w.")) {
2517 if (VecWidth == 128)
2518 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2519 else if (VecWidth == 256)
2520 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2521 else if (VecWidth == 512)
2522 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2523 else
2524 llvm_unreachable("Unexpected intrinsic");
2525 } else if (Name.starts_with("packsswb.")) {
2526 if (VecWidth == 128)
2527 IID = Intrinsic::x86_sse2_packsswb_128;
2528 else if (VecWidth == 256)
2529 IID = Intrinsic::x86_avx2_packsswb;
2530 else if (VecWidth == 512)
2531 IID = Intrinsic::x86_avx512_packsswb_512;
2532 else
2533 llvm_unreachable("Unexpected intrinsic");
2534 } else if (Name.starts_with("packssdw.")) {
2535 if (VecWidth == 128)
2536 IID = Intrinsic::x86_sse2_packssdw_128;
2537 else if (VecWidth == 256)
2538 IID = Intrinsic::x86_avx2_packssdw;
2539 else if (VecWidth == 512)
2540 IID = Intrinsic::x86_avx512_packssdw_512;
2541 else
2542 llvm_unreachable("Unexpected intrinsic");
2543 } else if (Name.starts_with("packuswb.")) {
2544 if (VecWidth == 128)
2545 IID = Intrinsic::x86_sse2_packuswb_128;
2546 else if (VecWidth == 256)
2547 IID = Intrinsic::x86_avx2_packuswb;
2548 else if (VecWidth == 512)
2549 IID = Intrinsic::x86_avx512_packuswb_512;
2550 else
2551 llvm_unreachable("Unexpected intrinsic");
2552 } else if (Name.starts_with("packusdw.")) {
2553 if (VecWidth == 128)
2554 IID = Intrinsic::x86_sse41_packusdw;
2555 else if (VecWidth == 256)
2556 IID = Intrinsic::x86_avx2_packusdw;
2557 else if (VecWidth == 512)
2558 IID = Intrinsic::x86_avx512_packusdw_512;
2559 else
2560 llvm_unreachable("Unexpected intrinsic");
2561 } else if (Name.starts_with("vpermilvar.")) {
2562 if (VecWidth == 128 && EltWidth == 32)
2563 IID = Intrinsic::x86_avx_vpermilvar_ps;
2564 else if (VecWidth == 128 && EltWidth == 64)
2565 IID = Intrinsic::x86_avx_vpermilvar_pd;
2566 else if (VecWidth == 256 && EltWidth == 32)
2567 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2568 else if (VecWidth == 256 && EltWidth == 64)
2569 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2570 else if (VecWidth == 512 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2572 else if (VecWidth == 512 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2574 else
2575 llvm_unreachable("Unexpected intrinsic");
2576 } else if (Name == "cvtpd2dq.256") {
2577 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2578 } else if (Name == "cvtpd2ps.256") {
2579 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2580 } else if (Name == "cvttpd2dq.256") {
2581 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2582 } else if (Name == "cvttps2dq.128") {
2583 IID = Intrinsic::x86_sse2_cvttps2dq;
2584 } else if (Name == "cvttps2dq.256") {
2585 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2586 } else if (Name.starts_with("permvar.")) {
2587 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2588 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2589 IID = Intrinsic::x86_avx2_permps;
2590 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2591 IID = Intrinsic::x86_avx2_permd;
2592 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2593 IID = Intrinsic::x86_avx512_permvar_df_256;
2594 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2595 IID = Intrinsic::x86_avx512_permvar_di_256;
2596 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx512_permvar_sf_512;
2598 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx512_permvar_si_512;
2600 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_512;
2602 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_512;
2604 else if (VecWidth == 128 && EltWidth == 16)
2605 IID = Intrinsic::x86_avx512_permvar_hi_128;
2606 else if (VecWidth == 256 && EltWidth == 16)
2607 IID = Intrinsic::x86_avx512_permvar_hi_256;
2608 else if (VecWidth == 512 && EltWidth == 16)
2609 IID = Intrinsic::x86_avx512_permvar_hi_512;
2610 else if (VecWidth == 128 && EltWidth == 8)
2611 IID = Intrinsic::x86_avx512_permvar_qi_128;
2612 else if (VecWidth == 256 && EltWidth == 8)
2613 IID = Intrinsic::x86_avx512_permvar_qi_256;
2614 else if (VecWidth == 512 && EltWidth == 8)
2615 IID = Intrinsic::x86_avx512_permvar_qi_512;
2616 else
2617 llvm_unreachable("Unexpected intrinsic");
2618 } else if (Name.starts_with("dbpsadbw.")) {
2619 if (VecWidth == 128)
2620 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2621 else if (VecWidth == 256)
2622 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2623 else if (VecWidth == 512)
2624 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2625 else
2626 llvm_unreachable("Unexpected intrinsic");
2627 } else if (Name.starts_with("pmultishift.qb.")) {
2628 if (VecWidth == 128)
2629 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2630 else if (VecWidth == 256)
2631 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2632 else if (VecWidth == 512)
2633 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2634 else
2635 llvm_unreachable("Unexpected intrinsic");
2636 } else if (Name.starts_with("conflict.")) {
2637 if (Name[9] == 'd' && VecWidth == 128)
2638 IID = Intrinsic::x86_avx512_conflict_d_128;
2639 else if (Name[9] == 'd' && VecWidth == 256)
2640 IID = Intrinsic::x86_avx512_conflict_d_256;
2641 else if (Name[9] == 'd' && VecWidth == 512)
2642 IID = Intrinsic::x86_avx512_conflict_d_512;
2643 else if (Name[9] == 'q' && VecWidth == 128)
2644 IID = Intrinsic::x86_avx512_conflict_q_128;
2645 else if (Name[9] == 'q' && VecWidth == 256)
2646 IID = Intrinsic::x86_avx512_conflict_q_256;
2647 else if (Name[9] == 'q' && VecWidth == 512)
2648 IID = Intrinsic::x86_avx512_conflict_q_512;
2649 else
2650 llvm_unreachable("Unexpected intrinsic");
2651 } else if (Name.starts_with("pavg.")) {
2652 if (Name[5] == 'b' && VecWidth == 128)
2653 IID = Intrinsic::x86_sse2_pavg_b;
2654 else if (Name[5] == 'b' && VecWidth == 256)
2655 IID = Intrinsic::x86_avx2_pavg_b;
2656 else if (Name[5] == 'b' && VecWidth == 512)
2657 IID = Intrinsic::x86_avx512_pavg_b_512;
2658 else if (Name[5] == 'w' && VecWidth == 128)
2659 IID = Intrinsic::x86_sse2_pavg_w;
2660 else if (Name[5] == 'w' && VecWidth == 256)
2661 IID = Intrinsic::x86_avx2_pavg_w;
2662 else if (Name[5] == 'w' && VecWidth == 512)
2663 IID = Intrinsic::x86_avx512_pavg_w_512;
2664 else
2665 llvm_unreachable("Unexpected intrinsic");
2666 } else
2667 return false;
2668
2669 SmallVector<Value *, 4> Args(CI.args());
2670 Args.pop_back();
2671 Args.pop_back();
2672 Rep = Builder.CreateIntrinsic(IID, Args);
2673 unsigned NumArgs = CI.arg_size();
2674 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2675 CI.getArgOperand(NumArgs - 2));
2676 return true;
2677}
2678
2679/// Upgrade comment in call to inline asm that represents an objc retain release
2680/// marker.
2681void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2682 size_t Pos;
2683 if (AsmStr->find("mov\tfp") == 0 &&
2684 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2685 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2686 AsmStr->replace(Pos, 1, ";");
2687 }
2688}
2689
2691 Function *F, IRBuilder<> &Builder) {
2692 Value *Rep = nullptr;
2693
2694 if (Name == "abs.i" || Name == "abs.ll") {
2695 Value *Arg = CI->getArgOperand(0);
2696 Value *Neg = Builder.CreateNeg(Arg, "neg");
2697 Value *Cmp = Builder.CreateICmpSGE(
2698 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2699 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2700 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2701 Type *Ty = (Name == "abs.bf16")
2702 ? Builder.getBFloatTy()
2703 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2704 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2705 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2706 Rep = Builder.CreateBitCast(Abs, CI->getType());
2707 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2708 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2709 : Intrinsic::nvvm_fabs;
2710 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2711 } else if (Name.consume_front("ex2.approx.")) {
2712 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2713 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2714 : Intrinsic::nvvm_ex2_approx;
2715 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2716 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2717 Name.starts_with("atomic.load.add.f64.p")) {
2718 Value *Ptr = CI->getArgOperand(0);
2719 Value *Val = CI->getArgOperand(1);
2720 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2722 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2723 Name.starts_with("atomic.load.dec.32.p")) {
2724 Value *Ptr = CI->getArgOperand(0);
2725 Value *Val = CI->getArgOperand(1);
2726 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2728 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2730 } else if (Name == "clz.ll") {
2731 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2732 Value *Arg = CI->getArgOperand(0);
2733 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2734 {Arg, Builder.getFalse()},
2735 /*FMFSource=*/nullptr, "ctlz");
2736 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2737 } else if (Name == "popc.ll") {
2738 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2739 // i64.
2740 Value *Arg = CI->getArgOperand(0);
2741 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2742 Arg, /*FMFSource=*/nullptr, "ctpop");
2743 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2744 } else if (Name == "h2f") {
2745 Value *Cast =
2746 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2747 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2748 } else if (Name.consume_front("bitcast.") &&
2749 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2750 Name == "d2ll")) {
2751 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2752 } else if (Name == "rotate.b32") {
2753 Value *Arg = CI->getOperand(0);
2754 Value *ShiftAmt = CI->getOperand(1);
2755 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2756 {Arg, Arg, ShiftAmt});
2757 } else if (Name == "rotate.b64") {
2758 Type *Int64Ty = Builder.getInt64Ty();
2759 Value *Arg = CI->getOperand(0);
2760 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2761 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2762 {Arg, Arg, ZExtShiftAmt});
2763 } else if (Name == "rotate.right.b64") {
2764 Type *Int64Ty = Builder.getInt64Ty();
2765 Value *Arg = CI->getOperand(0);
2766 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2767 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2768 {Arg, Arg, ZExtShiftAmt});
2769 } else if (Name == "swap.lo.hi.b64") {
2770 Type *Int64Ty = Builder.getInt64Ty();
2771 Value *Arg = CI->getOperand(0);
2772 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2773 {Arg, Arg, Builder.getInt64(32)});
2774 } else if ((Name.consume_front("ptr.gen.to.") &&
2775 consumeNVVMPtrAddrSpace(Name)) ||
2776 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2777 Name.starts_with(".to.gen"))) {
2778 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2779 } else if (Name.consume_front("ldg.global")) {
2780 Value *Ptr = CI->getArgOperand(0);
2781 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2782 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2783 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2784 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2785 MDNode *MD = MDNode::get(Builder.getContext(), {});
2786 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2787 return LD;
2788 } else if (Name == "tanh.approx.f32") {
2789 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2790 FastMathFlags FMF;
2791 FMF.setApproxFunc();
2792 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2793 FMF);
2794 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2795 Value *Arg =
2796 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2797 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2798 {}, {Arg});
2799 } else if (Name == "barrier") {
2800 Rep = Builder.CreateIntrinsic(
2801 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2802 {CI->getArgOperand(0), CI->getArgOperand(1)});
2803 } else if (Name == "barrier.sync") {
2804 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2805 {CI->getArgOperand(0)});
2806 } else if (Name == "barrier.sync.cnt") {
2807 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2808 {CI->getArgOperand(0), CI->getArgOperand(1)});
2809 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2810 Name == "barrier0.or") {
2811 Value *C = CI->getArgOperand(0);
2812 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2813
2814 Intrinsic::ID IID =
2816 .Case("barrier0.popc",
2817 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2818 .Case("barrier0.and",
2819 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2820 .Case("barrier0.or",
2821 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2822 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2823 Rep = Builder.CreateZExt(Bar, CI->getType());
2824 } else {
2826 if (IID != Intrinsic::not_intrinsic &&
2827 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2828 rename(F);
2829 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2831 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2832 Value *Arg = CI->getArgOperand(I);
2833 Type *OldType = Arg->getType();
2834 Type *NewType = NewFn->getArg(I)->getType();
2835 Args.push_back(
2836 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2837 ? Builder.CreateBitCast(Arg, NewType)
2838 : Arg);
2839 }
2840 Rep = Builder.CreateCall(NewFn, Args);
2841 if (F->getReturnType()->isIntegerTy())
2842 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2843 }
2844 }
2845
2846 return Rep;
2847}
2848
2850 IRBuilder<> &Builder) {
2851 LLVMContext &C = F->getContext();
2852 Value *Rep = nullptr;
2853
2854 if (Name.starts_with("sse4a.movnt.")) {
2856 Elts.push_back(
2857 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2858 MDNode *Node = MDNode::get(C, Elts);
2859
2860 Value *Arg0 = CI->getArgOperand(0);
2861 Value *Arg1 = CI->getArgOperand(1);
2862
2863 // Nontemporal (unaligned) store of the 0'th element of the float/double
2864 // vector.
2865 Value *Extract =
2866 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2867
2868 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2869 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2870 } else if (Name.starts_with("avx.movnt.") ||
2871 Name.starts_with("avx512.storent.")) {
2873 Elts.push_back(
2874 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2875 MDNode *Node = MDNode::get(C, Elts);
2876
2877 Value *Arg0 = CI->getArgOperand(0);
2878 Value *Arg1 = CI->getArgOperand(1);
2879
2880 StoreInst *SI = Builder.CreateAlignedStore(
2881 Arg1, Arg0,
2883 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2884 } else if (Name == "sse2.storel.dq") {
2885 Value *Arg0 = CI->getArgOperand(0);
2886 Value *Arg1 = CI->getArgOperand(1);
2887
2888 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2889 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2890 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2891 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2892 } else if (Name.starts_with("sse.storeu.") ||
2893 Name.starts_with("sse2.storeu.") ||
2894 Name.starts_with("avx.storeu.")) {
2895 Value *Arg0 = CI->getArgOperand(0);
2896 Value *Arg1 = CI->getArgOperand(1);
2897 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2898 } else if (Name == "avx512.mask.store.ss") {
2899 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2900 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2901 Mask, false);
2902 } else if (Name.starts_with("avx512.mask.store")) {
2903 // "avx512.mask.storeu." or "avx512.mask.store."
2904 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2905 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2906 CI->getArgOperand(2), Aligned);
2907 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2908 // Upgrade packed integer vector compare intrinsics to compare instructions.
2909 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2910 bool CmpEq = Name[9] == 'e';
2911 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2912 CI->getArgOperand(0), CI->getArgOperand(1));
2913 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2914 } else if (Name.starts_with("avx512.broadcastm")) {
2915 Type *ExtTy = Type::getInt32Ty(C);
2916 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2917 ExtTy = Type::getInt64Ty(C);
2918 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2919 ExtTy->getPrimitiveSizeInBits();
2920 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2921 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2922 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2923 Value *Vec = CI->getArgOperand(0);
2924 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2925 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2926 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2927 } else if (Name.starts_with("avx.sqrt.p") ||
2928 Name.starts_with("sse2.sqrt.p") ||
2929 Name.starts_with("sse.sqrt.p")) {
2930 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2931 {CI->getArgOperand(0)});
2932 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2933 if (CI->arg_size() == 4 &&
2934 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2935 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2936 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2937 : Intrinsic::x86_avx512_sqrt_pd_512;
2938
2939 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2940 Rep = Builder.CreateIntrinsic(IID, Args);
2941 } else {
2942 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2943 {CI->getArgOperand(0)});
2944 }
2945 Rep =
2946 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2947 } else if (Name.starts_with("avx512.ptestm") ||
2948 Name.starts_with("avx512.ptestnm")) {
2949 Value *Op0 = CI->getArgOperand(0);
2950 Value *Op1 = CI->getArgOperand(1);
2951 Value *Mask = CI->getArgOperand(2);
2952 Rep = Builder.CreateAnd(Op0, Op1);
2953 llvm::Type *Ty = Op0->getType();
2955 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2958 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2959 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2960 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2961 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2962 ->getNumElements();
2963 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2964 Rep =
2965 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2966 } else if (Name.starts_with("avx512.kunpck")) {
2967 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2968 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2969 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2970 int Indices[64];
2971 for (unsigned i = 0; i != NumElts; ++i)
2972 Indices[i] = i;
2973
2974 // First extract half of each vector. This gives better codegen than
2975 // doing it in a single shuffle.
2976 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2977 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2978 // Concat the vectors.
2979 // NOTE: Operands have to be swapped to match intrinsic definition.
2980 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2981 Rep = Builder.CreateBitCast(Rep, CI->getType());
2982 } else if (Name == "avx512.kand.w") {
2983 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2984 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2985 Rep = Builder.CreateAnd(LHS, RHS);
2986 Rep = Builder.CreateBitCast(Rep, CI->getType());
2987 } else if (Name == "avx512.kandn.w") {
2988 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2989 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2990 LHS = Builder.CreateNot(LHS);
2991 Rep = Builder.CreateAnd(LHS, RHS);
2992 Rep = Builder.CreateBitCast(Rep, CI->getType());
2993 } else if (Name == "avx512.kor.w") {
2994 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2995 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2996 Rep = Builder.CreateOr(LHS, RHS);
2997 Rep = Builder.CreateBitCast(Rep, CI->getType());
2998 } else if (Name == "avx512.kxor.w") {
2999 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3000 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3001 Rep = Builder.CreateXor(LHS, RHS);
3002 Rep = Builder.CreateBitCast(Rep, CI->getType());
3003 } else if (Name == "avx512.kxnor.w") {
3004 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3005 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3006 LHS = Builder.CreateNot(LHS);
3007 Rep = Builder.CreateXor(LHS, RHS);
3008 Rep = Builder.CreateBitCast(Rep, CI->getType());
3009 } else if (Name == "avx512.knot.w") {
3010 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3011 Rep = Builder.CreateNot(Rep);
3012 Rep = Builder.CreateBitCast(Rep, CI->getType());
3013 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3014 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3015 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3016 Rep = Builder.CreateOr(LHS, RHS);
3017 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3018 Value *C;
3019 if (Name[14] == 'c')
3020 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3021 else
3022 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3023 Rep = Builder.CreateICmpEQ(Rep, C);
3024 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3025 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3026 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3027 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3028 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3029 Type *I32Ty = Type::getInt32Ty(C);
3030 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3031 ConstantInt::get(I32Ty, 0));
3032 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3033 ConstantInt::get(I32Ty, 0));
3034 Value *EltOp;
3035 if (Name.contains(".add."))
3036 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3037 else if (Name.contains(".sub."))
3038 EltOp = Builder.CreateFSub(Elt0, Elt1);
3039 else if (Name.contains(".mul."))
3040 EltOp = Builder.CreateFMul(Elt0, Elt1);
3041 else
3042 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3043 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3044 ConstantInt::get(I32Ty, 0));
3045 } else if (Name.starts_with("avx512.mask.pcmp")) {
3046 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3047 bool CmpEq = Name[16] == 'e';
3048 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3049 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3050 Type *OpTy = CI->getArgOperand(0)->getType();
3051 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3052 Intrinsic::ID IID;
3053 switch (VecWidth) {
3054 default:
3055 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3056 break;
3057 case 128:
3058 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3059 break;
3060 case 256:
3061 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3062 break;
3063 case 512:
3064 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3065 break;
3066 }
3067
3068 Rep =
3069 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3070 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3071 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3072 Type *OpTy = CI->getArgOperand(0)->getType();
3073 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3074 unsigned EltWidth = OpTy->getScalarSizeInBits();
3075 Intrinsic::ID IID;
3076 if (VecWidth == 128 && EltWidth == 32)
3077 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3078 else if (VecWidth == 256 && EltWidth == 32)
3079 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3080 else if (VecWidth == 512 && EltWidth == 32)
3081 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3082 else if (VecWidth == 128 && EltWidth == 64)
3083 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3084 else if (VecWidth == 256 && EltWidth == 64)
3085 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3086 else if (VecWidth == 512 && EltWidth == 64)
3087 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3088 else
3089 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3090
3091 Rep =
3092 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3093 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3094 } else if (Name.starts_with("avx512.cmp.p")) {
3095 SmallVector<Value *, 4> Args(CI->args());
3096 Type *OpTy = Args[0]->getType();
3097 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3098 unsigned EltWidth = OpTy->getScalarSizeInBits();
3099 Intrinsic::ID IID;
3100 if (VecWidth == 128 && EltWidth == 32)
3101 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3102 else if (VecWidth == 256 && EltWidth == 32)
3103 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3104 else if (VecWidth == 512 && EltWidth == 32)
3105 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3106 else if (VecWidth == 128 && EltWidth == 64)
3107 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3108 else if (VecWidth == 256 && EltWidth == 64)
3109 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3110 else if (VecWidth == 512 && EltWidth == 64)
3111 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3112 else
3113 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3114
3116 if (VecWidth == 512)
3117 std::swap(Mask, Args.back());
3118 Args.push_back(Mask);
3119
3120 Rep = Builder.CreateIntrinsic(IID, Args);
3121 } else if (Name.starts_with("avx512.mask.cmp.")) {
3122 // Integer compare intrinsics.
3123 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3124 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3125 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3126 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3127 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3128 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3129 Name.starts_with("avx512.cvtw2mask.") ||
3130 Name.starts_with("avx512.cvtd2mask.") ||
3131 Name.starts_with("avx512.cvtq2mask.")) {
3132 Value *Op = CI->getArgOperand(0);
3133 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3134 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3135 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3136 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3137 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3138 Name.starts_with("avx512.mask.pabs")) {
3139 Rep = upgradeAbs(Builder, *CI);
3140 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3141 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3142 Name.starts_with("avx512.mask.pmaxs")) {
3143 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3144 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3145 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3146 Name.starts_with("avx512.mask.pmaxu")) {
3147 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3148 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3149 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3150 Name.starts_with("avx512.mask.pmins")) {
3151 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3152 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3153 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3154 Name.starts_with("avx512.mask.pminu")) {
3155 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3156 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3157 Name == "avx512.pmulu.dq.512" ||
3158 Name.starts_with("avx512.mask.pmulu.dq.")) {
3159 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3160 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3161 Name == "avx512.pmul.dq.512" ||
3162 Name.starts_with("avx512.mask.pmul.dq.")) {
3163 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3164 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3165 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3166 Rep =
3167 Builder.CreateSIToFP(CI->getArgOperand(1),
3168 cast<VectorType>(CI->getType())->getElementType());
3169 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3170 } else if (Name == "avx512.cvtusi2sd") {
3171 Rep =
3172 Builder.CreateUIToFP(CI->getArgOperand(1),
3173 cast<VectorType>(CI->getType())->getElementType());
3174 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3175 } else if (Name == "sse2.cvtss2sd") {
3176 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3177 Rep = Builder.CreateFPExt(
3178 Rep, cast<VectorType>(CI->getType())->getElementType());
3179 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3180 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3181 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3182 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3183 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3184 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3185 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3186 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3187 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3188 Name == "avx512.mask.cvtqq2ps.256" ||
3189 Name == "avx512.mask.cvtqq2ps.512" ||
3190 Name == "avx512.mask.cvtuqq2ps.256" ||
3191 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3192 Name == "avx.cvt.ps2.pd.256" ||
3193 Name == "avx512.mask.cvtps2pd.128" ||
3194 Name == "avx512.mask.cvtps2pd.256") {
3195 auto *DstTy = cast<FixedVectorType>(CI->getType());
3196 Rep = CI->getArgOperand(0);
3197 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3198
3199 unsigned NumDstElts = DstTy->getNumElements();
3200 if (NumDstElts < SrcTy->getNumElements()) {
3201 assert(NumDstElts == 2 && "Unexpected vector size");
3202 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3203 }
3204
3205 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3206 bool IsUnsigned = Name.contains("cvtu");
3207 if (IsPS2PD)
3208 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3209 else if (CI->arg_size() == 4 &&
3210 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3211 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3212 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3213 : Intrinsic::x86_avx512_sitofp_round;
3214 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3215 {Rep, CI->getArgOperand(3)});
3216 } else {
3217 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3218 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3219 }
3220
3221 if (CI->arg_size() >= 3)
3222 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3223 CI->getArgOperand(1));
3224 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3225 Name.starts_with("vcvtph2ps.")) {
3226 auto *DstTy = cast<FixedVectorType>(CI->getType());
3227 Rep = CI->getArgOperand(0);
3228 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3229 unsigned NumDstElts = DstTy->getNumElements();
3230 if (NumDstElts != SrcTy->getNumElements()) {
3231 assert(NumDstElts == 4 && "Unexpected vector size");
3232 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3233 }
3234 Rep = Builder.CreateBitCast(
3235 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3236 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3237 if (CI->arg_size() >= 3)
3238 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3239 CI->getArgOperand(1));
3240 } else if (Name.starts_with("avx512.mask.load")) {
3241 // "avx512.mask.loadu." or "avx512.mask.load."
3242 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3243 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3244 CI->getArgOperand(2), Aligned);
3245 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3246 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3247 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3248 ResultTy->getNumElements());
3249
3250 Rep = Builder.CreateIntrinsic(
3251 Intrinsic::masked_expandload, ResultTy,
3252 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3253 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3254 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3255 Value *MaskVec =
3256 getX86MaskVec(Builder, CI->getArgOperand(2),
3257 cast<FixedVectorType>(ResultTy)->getNumElements());
3258
3259 Rep = Builder.CreateIntrinsic(
3260 Intrinsic::masked_compressstore, ResultTy,
3261 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3262 } else if (Name.starts_with("avx512.mask.compress.") ||
3263 Name.starts_with("avx512.mask.expand.")) {
3264 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3265
3266 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3267 ResultTy->getNumElements());
3268
3269 bool IsCompress = Name[12] == 'c';
3270 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3271 : Intrinsic::x86_avx512_mask_expand;
3272 Rep = Builder.CreateIntrinsic(
3273 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3274 } else if (Name.starts_with("xop.vpcom")) {
3275 bool IsSigned;
3276 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3277 Name.ends_with("uq"))
3278 IsSigned = false;
3279 else if (Name.ends_with("b") || Name.ends_with("w") ||
3280 Name.ends_with("d") || Name.ends_with("q"))
3281 IsSigned = true;
3282 else
3283 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3284
3285 unsigned Imm;
3286 if (CI->arg_size() == 3) {
3287 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3288 } else {
3289 Name = Name.substr(9); // strip off "xop.vpcom"
3290 if (Name.starts_with("lt"))
3291 Imm = 0;
3292 else if (Name.starts_with("le"))
3293 Imm = 1;
3294 else if (Name.starts_with("gt"))
3295 Imm = 2;
3296 else if (Name.starts_with("ge"))
3297 Imm = 3;
3298 else if (Name.starts_with("eq"))
3299 Imm = 4;
3300 else if (Name.starts_with("ne"))
3301 Imm = 5;
3302 else if (Name.starts_with("false"))
3303 Imm = 6;
3304 else if (Name.starts_with("true"))
3305 Imm = 7;
3306 else
3307 llvm_unreachable("Unknown condition");
3308 }
3309
3310 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3311 } else if (Name.starts_with("xop.vpcmov")) {
3312 Value *Sel = CI->getArgOperand(2);
3313 Value *NotSel = Builder.CreateNot(Sel);
3314 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3315 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3316 Rep = Builder.CreateOr(Sel0, Sel1);
3317 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3318 Name.starts_with("avx512.mask.prol")) {
3319 Rep = upgradeX86Rotate(Builder, *CI, false);
3320 } else if (Name.starts_with("avx512.pror") ||
3321 Name.starts_with("avx512.mask.pror")) {
3322 Rep = upgradeX86Rotate(Builder, *CI, true);
3323 } else if (Name.starts_with("avx512.vpshld.") ||
3324 Name.starts_with("avx512.mask.vpshld") ||
3325 Name.starts_with("avx512.maskz.vpshld")) {
3326 bool ZeroMask = Name[11] == 'z';
3327 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3328 } else if (Name.starts_with("avx512.vpshrd.") ||
3329 Name.starts_with("avx512.mask.vpshrd") ||
3330 Name.starts_with("avx512.maskz.vpshrd")) {
3331 bool ZeroMask = Name[11] == 'z';
3332 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3333 } else if (Name == "sse42.crc32.64.8") {
3334 Value *Trunc0 =
3335 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3336 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3337 {Trunc0, CI->getArgOperand(1)});
3338 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3339 } else if (Name.starts_with("avx.vbroadcast.s") ||
3340 Name.starts_with("avx512.vbroadcast.s")) {
3341 // Replace broadcasts with a series of insertelements.
3342 auto *VecTy = cast<FixedVectorType>(CI->getType());
3343 Type *EltTy = VecTy->getElementType();
3344 unsigned EltNum = VecTy->getNumElements();
3345 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3346 Type *I32Ty = Type::getInt32Ty(C);
3347 Rep = PoisonValue::get(VecTy);
3348 for (unsigned I = 0; I < EltNum; ++I)
3349 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3350 } else if (Name.starts_with("sse41.pmovsx") ||
3351 Name.starts_with("sse41.pmovzx") ||
3352 Name.starts_with("avx2.pmovsx") ||
3353 Name.starts_with("avx2.pmovzx") ||
3354 Name.starts_with("avx512.mask.pmovsx") ||
3355 Name.starts_with("avx512.mask.pmovzx")) {
3356 auto *DstTy = cast<FixedVectorType>(CI->getType());
3357 unsigned NumDstElts = DstTy->getNumElements();
3358
3359 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3360 SmallVector<int, 8> ShuffleMask(NumDstElts);
3361 for (unsigned i = 0; i != NumDstElts; ++i)
3362 ShuffleMask[i] = i;
3363
3364 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3365
3366 bool DoSext = Name.contains("pmovsx");
3367 Rep =
3368 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3369 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3370 if (CI->arg_size() == 3)
3371 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3372 CI->getArgOperand(1));
3373 } else if (Name == "avx512.mask.pmov.qd.256" ||
3374 Name == "avx512.mask.pmov.qd.512" ||
3375 Name == "avx512.mask.pmov.wb.256" ||
3376 Name == "avx512.mask.pmov.wb.512") {
3377 Type *Ty = CI->getArgOperand(1)->getType();
3378 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3379 Rep =
3380 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3381 } else if (Name.starts_with("avx.vbroadcastf128") ||
3382 Name == "avx2.vbroadcasti128") {
3383 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3384 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3385 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3386 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3387 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3388 if (NumSrcElts == 2)
3389 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3390 else
3391 Rep = Builder.CreateShuffleVector(Load,
3392 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3393 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3394 Name.starts_with("avx512.mask.shuf.f")) {
3395 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3396 Type *VT = CI->getType();
3397 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3398 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3399 unsigned ControlBitsMask = NumLanes - 1;
3400 unsigned NumControlBits = NumLanes / 2;
3401 SmallVector<int, 8> ShuffleMask(0);
3402
3403 for (unsigned l = 0; l != NumLanes; ++l) {
3404 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3405 // We actually need the other source.
3406 if (l >= NumLanes / 2)
3407 LaneMask += NumLanes;
3408 for (unsigned i = 0; i != NumElementsInLane; ++i)
3409 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3410 }
3411 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3412 CI->getArgOperand(1), ShuffleMask);
3413 Rep =
3414 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3415 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3416 Name.starts_with("avx512.mask.broadcasti")) {
3417 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3418 ->getNumElements();
3419 unsigned NumDstElts =
3420 cast<FixedVectorType>(CI->getType())->getNumElements();
3421
3422 SmallVector<int, 8> ShuffleMask(NumDstElts);
3423 for (unsigned i = 0; i != NumDstElts; ++i)
3424 ShuffleMask[i] = i % NumSrcElts;
3425
3426 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3427 CI->getArgOperand(0), ShuffleMask);
3428 Rep =
3429 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3430 } else if (Name.starts_with("avx2.pbroadcast") ||
3431 Name.starts_with("avx2.vbroadcast") ||
3432 Name.starts_with("avx512.pbroadcast") ||
3433 Name.starts_with("avx512.mask.broadcast.s")) {
3434 // Replace vp?broadcasts with a vector shuffle.
3435 Value *Op = CI->getArgOperand(0);
3436 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3437 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3440 Rep = Builder.CreateShuffleVector(Op, M);
3441
3442 if (CI->arg_size() == 3)
3443 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3444 CI->getArgOperand(1));
3445 } else if (Name.starts_with("sse2.padds.") ||
3446 Name.starts_with("avx2.padds.") ||
3447 Name.starts_with("avx512.padds.") ||
3448 Name.starts_with("avx512.mask.padds.")) {
3449 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3450 } else if (Name.starts_with("sse2.psubs.") ||
3451 Name.starts_with("avx2.psubs.") ||
3452 Name.starts_with("avx512.psubs.") ||
3453 Name.starts_with("avx512.mask.psubs.")) {
3454 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3455 } else if (Name.starts_with("sse2.paddus.") ||
3456 Name.starts_with("avx2.paddus.") ||
3457 Name.starts_with("avx512.mask.paddus.")) {
3458 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3459 } else if (Name.starts_with("sse2.psubus.") ||
3460 Name.starts_with("avx2.psubus.") ||
3461 Name.starts_with("avx512.mask.psubus.")) {
3462 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3463 } else if (Name.starts_with("avx512.mask.palignr.")) {
3464 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3465 CI->getArgOperand(1), CI->getArgOperand(2),
3466 CI->getArgOperand(3), CI->getArgOperand(4),
3467 false);
3468 } else if (Name.starts_with("avx512.mask.valign.")) {
3470 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3471 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3472 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3473 // 128/256-bit shift left specified in bits.
3474 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3475 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3476 Shift / 8); // Shift is in bits.
3477 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3478 // 128/256-bit shift right specified in bits.
3479 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3480 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3481 Shift / 8); // Shift is in bits.
3482 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3483 Name == "avx512.psll.dq.512") {
3484 // 128/256/512-bit shift left specified in bytes.
3485 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3486 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3487 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3488 Name == "avx512.psrl.dq.512") {
3489 // 128/256/512-bit shift right specified in bytes.
3490 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3491 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3492 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3493 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3494 Name.starts_with("avx2.pblendd.")) {
3495 Value *Op0 = CI->getArgOperand(0);
3496 Value *Op1 = CI->getArgOperand(1);
3497 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3498 auto *VecTy = cast<FixedVectorType>(CI->getType());
3499 unsigned NumElts = VecTy->getNumElements();
3500
3501 SmallVector<int, 16> Idxs(NumElts);
3502 for (unsigned i = 0; i != NumElts; ++i)
3503 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3504
3505 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3506 } else if (Name.starts_with("avx.vinsertf128.") ||
3507 Name == "avx2.vinserti128" ||
3508 Name.starts_with("avx512.mask.insert")) {
3509 Value *Op0 = CI->getArgOperand(0);
3510 Value *Op1 = CI->getArgOperand(1);
3511 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3512 unsigned DstNumElts =
3513 cast<FixedVectorType>(CI->getType())->getNumElements();
3514 unsigned SrcNumElts =
3515 cast<FixedVectorType>(Op1->getType())->getNumElements();
3516 unsigned Scale = DstNumElts / SrcNumElts;
3517
3518 // Mask off the high bits of the immediate value; hardware ignores those.
3519 Imm = Imm % Scale;
3520
3521 // Extend the second operand into a vector the size of the destination.
3522 SmallVector<int, 8> Idxs(DstNumElts);
3523 for (unsigned i = 0; i != SrcNumElts; ++i)
3524 Idxs[i] = i;
3525 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3526 Idxs[i] = SrcNumElts;
3527 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3528
3529 // Insert the second operand into the first operand.
3530
3531 // Note that there is no guarantee that instruction lowering will actually
3532 // produce a vinsertf128 instruction for the created shuffles. In
3533 // particular, the 0 immediate case involves no lane changes, so it can
3534 // be handled as a blend.
3535
3536 // Example of shuffle mask for 32-bit elements:
3537 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3538 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3539
3540 // First fill with identify mask.
3541 for (unsigned i = 0; i != DstNumElts; ++i)
3542 Idxs[i] = i;
3543 // Then replace the elements where we need to insert.
3544 for (unsigned i = 0; i != SrcNumElts; ++i)
3545 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3546 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3547
3548 // If the intrinsic has a mask operand, handle that.
3549 if (CI->arg_size() == 5)
3550 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3551 CI->getArgOperand(3));
3552 } else if (Name.starts_with("avx.vextractf128.") ||
3553 Name == "avx2.vextracti128" ||
3554 Name.starts_with("avx512.mask.vextract")) {
3555 Value *Op0 = CI->getArgOperand(0);
3556 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3557 unsigned DstNumElts =
3558 cast<FixedVectorType>(CI->getType())->getNumElements();
3559 unsigned SrcNumElts =
3560 cast<FixedVectorType>(Op0->getType())->getNumElements();
3561 unsigned Scale = SrcNumElts / DstNumElts;
3562
3563 // Mask off the high bits of the immediate value; hardware ignores those.
3564 Imm = Imm % Scale;
3565
3566 // Get indexes for the subvector of the input vector.
3567 SmallVector<int, 8> Idxs(DstNumElts);
3568 for (unsigned i = 0; i != DstNumElts; ++i) {
3569 Idxs[i] = i + (Imm * DstNumElts);
3570 }
3571 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3572
3573 // If the intrinsic has a mask operand, handle that.
3574 if (CI->arg_size() == 4)
3575 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3576 CI->getArgOperand(2));
3577 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3578 Name.starts_with("avx512.mask.perm.di.")) {
3579 Value *Op0 = CI->getArgOperand(0);
3580 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3581 auto *VecTy = cast<FixedVectorType>(CI->getType());
3582 unsigned NumElts = VecTy->getNumElements();
3583
3584 SmallVector<int, 8> Idxs(NumElts);
3585 for (unsigned i = 0; i != NumElts; ++i)
3586 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3587
3588 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3589
3590 if (CI->arg_size() == 4)
3591 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3592 CI->getArgOperand(2));
3593 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3594 // The immediate permute control byte looks like this:
3595 // [1:0] - select 128 bits from sources for low half of destination
3596 // [2] - ignore
3597 // [3] - zero low half of destination
3598 // [5:4] - select 128 bits from sources for high half of destination
3599 // [6] - ignore
3600 // [7] - zero high half of destination
3601
3602 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3603
3604 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3605 unsigned HalfSize = NumElts / 2;
3606 SmallVector<int, 8> ShuffleMask(NumElts);
3607
3608 // Determine which operand(s) are actually in use for this instruction.
3609 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3610 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3611
3612 // If needed, replace operands based on zero mask.
3613 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3614 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3615
3616 // Permute low half of result.
3617 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3618 for (unsigned i = 0; i < HalfSize; ++i)
3619 ShuffleMask[i] = StartIndex + i;
3620
3621 // Permute high half of result.
3622 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3623 for (unsigned i = 0; i < HalfSize; ++i)
3624 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3625
3626 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3627
3628 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3629 Name.starts_with("avx512.mask.vpermil.p") ||
3630 Name.starts_with("avx512.mask.pshuf.d.")) {
3631 Value *Op0 = CI->getArgOperand(0);
3632 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3633 auto *VecTy = cast<FixedVectorType>(CI->getType());
3634 unsigned NumElts = VecTy->getNumElements();
3635 // Calculate the size of each index in the immediate.
3636 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3637 unsigned IdxMask = ((1 << IdxSize) - 1);
3638
3639 SmallVector<int, 8> Idxs(NumElts);
3640 // Lookup the bits for this element, wrapping around the immediate every
3641 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3642 // to offset by the first index of each group.
3643 for (unsigned i = 0; i != NumElts; ++i)
3644 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3645
3646 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3647
3648 if (CI->arg_size() == 4)
3649 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3650 CI->getArgOperand(2));
3651 } else if (Name == "sse2.pshufl.w" ||
3652 Name.starts_with("avx512.mask.pshufl.w.")) {
3653 Value *Op0 = CI->getArgOperand(0);
3654 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3655 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3656
3657 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3658 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3659
3660 SmallVector<int, 16> Idxs(NumElts);
3661 for (unsigned l = 0; l != NumElts; l += 8) {
3662 for (unsigned i = 0; i != 4; ++i)
3663 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3664 for (unsigned i = 4; i != 8; ++i)
3665 Idxs[i + l] = i + l;
3666 }
3667
3668 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3669
3670 if (CI->arg_size() == 4)
3671 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3672 CI->getArgOperand(2));
3673 } else if (Name == "sse2.pshufh.w" ||
3674 Name.starts_with("avx512.mask.pshufh.w.")) {
3675 Value *Op0 = CI->getArgOperand(0);
3676 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3677 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3678
3679 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3680 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3681
3682 SmallVector<int, 16> Idxs(NumElts);
3683 for (unsigned l = 0; l != NumElts; l += 8) {
3684 for (unsigned i = 0; i != 4; ++i)
3685 Idxs[i + l] = i + l;
3686 for (unsigned i = 0; i != 4; ++i)
3687 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3688 }
3689
3690 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3691
3692 if (CI->arg_size() == 4)
3693 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3694 CI->getArgOperand(2));
3695 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3696 Value *Op0 = CI->getArgOperand(0);
3697 Value *Op1 = CI->getArgOperand(1);
3698 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3699 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3700
3701 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3702 unsigned HalfLaneElts = NumLaneElts / 2;
3703
3704 SmallVector<int, 16> Idxs(NumElts);
3705 for (unsigned i = 0; i != NumElts; ++i) {
3706 // Base index is the starting element of the lane.
3707 Idxs[i] = i - (i % NumLaneElts);
3708 // If we are half way through the lane switch to the other source.
3709 if ((i % NumLaneElts) >= HalfLaneElts)
3710 Idxs[i] += NumElts;
3711 // Now select the specific element. By adding HalfLaneElts bits from
3712 // the immediate. Wrapping around the immediate every 8-bits.
3713 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3714 }
3715
3716 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3717
3718 Rep =
3719 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3720 } else if (Name.starts_with("avx512.mask.movddup") ||
3721 Name.starts_with("avx512.mask.movshdup") ||
3722 Name.starts_with("avx512.mask.movsldup")) {
3723 Value *Op0 = CI->getArgOperand(0);
3724 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3725 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3726
3727 unsigned Offset = 0;
3728 if (Name.starts_with("avx512.mask.movshdup."))
3729 Offset = 1;
3730
3731 SmallVector<int, 16> Idxs(NumElts);
3732 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3733 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3734 Idxs[i + l + 0] = i + l + Offset;
3735 Idxs[i + l + 1] = i + l + Offset;
3736 }
3737
3738 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3739
3740 Rep =
3741 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3742 } else if (Name.starts_with("avx512.mask.punpckl") ||
3743 Name.starts_with("avx512.mask.unpckl.")) {
3744 Value *Op0 = CI->getArgOperand(0);
3745 Value *Op1 = CI->getArgOperand(1);
3746 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3747 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3748
3749 SmallVector<int, 64> Idxs(NumElts);
3750 for (int l = 0; l != NumElts; l += NumLaneElts)
3751 for (int i = 0; i != NumLaneElts; ++i)
3752 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3753
3754 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3755
3756 Rep =
3757 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3758 } else if (Name.starts_with("avx512.mask.punpckh") ||
3759 Name.starts_with("avx512.mask.unpckh.")) {
3760 Value *Op0 = CI->getArgOperand(0);
3761 Value *Op1 = CI->getArgOperand(1);
3762 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3763 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3764
3765 SmallVector<int, 64> Idxs(NumElts);
3766 for (int l = 0; l != NumElts; l += NumLaneElts)
3767 for (int i = 0; i != NumLaneElts; ++i)
3768 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3769
3770 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3771
3772 Rep =
3773 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3774 } else if (Name.starts_with("avx512.mask.and.") ||
3775 Name.starts_with("avx512.mask.pand.")) {
3776 VectorType *FTy = cast<VectorType>(CI->getType());
3778 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3779 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3780 Rep = Builder.CreateBitCast(Rep, FTy);
3781 Rep =
3782 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3783 } else if (Name.starts_with("avx512.mask.andn.") ||
3784 Name.starts_with("avx512.mask.pandn.")) {
3785 VectorType *FTy = cast<VectorType>(CI->getType());
3787 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3788 Rep = Builder.CreateAnd(Rep,
3789 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3790 Rep = Builder.CreateBitCast(Rep, FTy);
3791 Rep =
3792 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3793 } else if (Name.starts_with("avx512.mask.or.") ||
3794 Name.starts_with("avx512.mask.por.")) {
3795 VectorType *FTy = cast<VectorType>(CI->getType());
3797 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3798 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3799 Rep = Builder.CreateBitCast(Rep, FTy);
3800 Rep =
3801 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3802 } else if (Name.starts_with("avx512.mask.xor.") ||
3803 Name.starts_with("avx512.mask.pxor.")) {
3804 VectorType *FTy = cast<VectorType>(CI->getType());
3806 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3807 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3808 Rep = Builder.CreateBitCast(Rep, FTy);
3809 Rep =
3810 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3811 } else if (Name.starts_with("avx512.mask.padd.")) {
3812 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3813 Rep =
3814 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3815 } else if (Name.starts_with("avx512.mask.psub.")) {
3816 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3817 Rep =
3818 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3819 } else if (Name.starts_with("avx512.mask.pmull.")) {
3820 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3821 Rep =
3822 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3823 } else if (Name.starts_with("avx512.mask.add.p")) {
3824 if (Name.ends_with(".512")) {
3825 Intrinsic::ID IID;
3826 if (Name[17] == 's')
3827 IID = Intrinsic::x86_avx512_add_ps_512;
3828 else
3829 IID = Intrinsic::x86_avx512_add_pd_512;
3830
3831 Rep = Builder.CreateIntrinsic(
3832 IID,
3833 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3834 } else {
3835 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3836 }
3837 Rep =
3838 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3839 } else if (Name.starts_with("avx512.mask.div.p")) {
3840 if (Name.ends_with(".512")) {
3841 Intrinsic::ID IID;
3842 if (Name[17] == 's')
3843 IID = Intrinsic::x86_avx512_div_ps_512;
3844 else
3845 IID = Intrinsic::x86_avx512_div_pd_512;
3846
3847 Rep = Builder.CreateIntrinsic(
3848 IID,
3849 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3850 } else {
3851 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3852 }
3853 Rep =
3854 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3855 } else if (Name.starts_with("avx512.mask.mul.p")) {
3856 if (Name.ends_with(".512")) {
3857 Intrinsic::ID IID;
3858 if (Name[17] == 's')
3859 IID = Intrinsic::x86_avx512_mul_ps_512;
3860 else
3861 IID = Intrinsic::x86_avx512_mul_pd_512;
3862
3863 Rep = Builder.CreateIntrinsic(
3864 IID,
3865 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3866 } else {
3867 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3868 }
3869 Rep =
3870 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3871 } else if (Name.starts_with("avx512.mask.sub.p")) {
3872 if (Name.ends_with(".512")) {
3873 Intrinsic::ID IID;
3874 if (Name[17] == 's')
3875 IID = Intrinsic::x86_avx512_sub_ps_512;
3876 else
3877 IID = Intrinsic::x86_avx512_sub_pd_512;
3878
3879 Rep = Builder.CreateIntrinsic(
3880 IID,
3881 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3882 } else {
3883 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3884 }
3885 Rep =
3886 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3887 } else if ((Name.starts_with("avx512.mask.max.p") ||
3888 Name.starts_with("avx512.mask.min.p")) &&
3889 Name.drop_front(18) == ".512") {
3890 bool IsDouble = Name[17] == 'd';
3891 bool IsMin = Name[13] == 'i';
3892 static const Intrinsic::ID MinMaxTbl[2][2] = {
3893 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3894 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3895 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3896
3897 Rep = Builder.CreateIntrinsic(
3898 IID,
3899 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3900 Rep =
3901 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3902 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3903 Rep =
3904 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3905 {CI->getArgOperand(0), Builder.getInt1(false)});
3906 Rep =
3907 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3908 } else if (Name.starts_with("avx512.mask.psll")) {
3909 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3910 bool IsVariable = Name[16] == 'v';
3911 char Size = Name[16] == '.' ? Name[17]
3912 : Name[17] == '.' ? Name[18]
3913 : Name[18] == '.' ? Name[19]
3914 : Name[20];
3915
3916 Intrinsic::ID IID;
3917 if (IsVariable && Name[17] != '.') {
3918 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3919 IID = Intrinsic::x86_avx2_psllv_q;
3920 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3921 IID = Intrinsic::x86_avx2_psllv_q_256;
3922 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3923 IID = Intrinsic::x86_avx2_psllv_d;
3924 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3925 IID = Intrinsic::x86_avx2_psllv_d_256;
3926 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3927 IID = Intrinsic::x86_avx512_psllv_w_128;
3928 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3929 IID = Intrinsic::x86_avx512_psllv_w_256;
3930 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3931 IID = Intrinsic::x86_avx512_psllv_w_512;
3932 else
3933 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3934 } else if (Name.ends_with(".128")) {
3935 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3936 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3937 : Intrinsic::x86_sse2_psll_d;
3938 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3939 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3940 : Intrinsic::x86_sse2_psll_q;
3941 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3942 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3943 : Intrinsic::x86_sse2_psll_w;
3944 else
3945 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3946 } else if (Name.ends_with(".256")) {
3947 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3948 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3949 : Intrinsic::x86_avx2_psll_d;
3950 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3951 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3952 : Intrinsic::x86_avx2_psll_q;
3953 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3954 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3955 : Intrinsic::x86_avx2_psll_w;
3956 else
3957 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3958 } else {
3959 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3960 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3961 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3962 : Intrinsic::x86_avx512_psll_d_512;
3963 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3964 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3965 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3966 : Intrinsic::x86_avx512_psll_q_512;
3967 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3968 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3969 : Intrinsic::x86_avx512_psll_w_512;
3970 else
3971 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3972 }
3973
3974 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3975 } else if (Name.starts_with("avx512.mask.psrl")) {
3976 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3977 bool IsVariable = Name[16] == 'v';
3978 char Size = Name[16] == '.' ? Name[17]
3979 : Name[17] == '.' ? Name[18]
3980 : Name[18] == '.' ? Name[19]
3981 : Name[20];
3982
3983 Intrinsic::ID IID;
3984 if (IsVariable && Name[17] != '.') {
3985 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3986 IID = Intrinsic::x86_avx2_psrlv_q;
3987 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3988 IID = Intrinsic::x86_avx2_psrlv_q_256;
3989 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3990 IID = Intrinsic::x86_avx2_psrlv_d;
3991 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3992 IID = Intrinsic::x86_avx2_psrlv_d_256;
3993 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3994 IID = Intrinsic::x86_avx512_psrlv_w_128;
3995 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3996 IID = Intrinsic::x86_avx512_psrlv_w_256;
3997 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3998 IID = Intrinsic::x86_avx512_psrlv_w_512;
3999 else
4000 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4001 } else if (Name.ends_with(".128")) {
4002 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4003 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4004 : Intrinsic::x86_sse2_psrl_d;
4005 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4006 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4007 : Intrinsic::x86_sse2_psrl_q;
4008 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4009 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4010 : Intrinsic::x86_sse2_psrl_w;
4011 else
4012 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4013 } else if (Name.ends_with(".256")) {
4014 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4015 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4016 : Intrinsic::x86_avx2_psrl_d;
4017 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4018 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4019 : Intrinsic::x86_avx2_psrl_q;
4020 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4021 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4022 : Intrinsic::x86_avx2_psrl_w;
4023 else
4024 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4025 } else {
4026 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4027 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4028 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4029 : Intrinsic::x86_avx512_psrl_d_512;
4030 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4031 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4032 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4033 : Intrinsic::x86_avx512_psrl_q_512;
4034 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4035 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4036 : Intrinsic::x86_avx512_psrl_w_512;
4037 else
4038 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4039 }
4040
4041 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4042 } else if (Name.starts_with("avx512.mask.psra")) {
4043 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4044 bool IsVariable = Name[16] == 'v';
4045 char Size = Name[16] == '.' ? Name[17]
4046 : Name[17] == '.' ? Name[18]
4047 : Name[18] == '.' ? Name[19]
4048 : Name[20];
4049
4050 Intrinsic::ID IID;
4051 if (IsVariable && Name[17] != '.') {
4052 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4053 IID = Intrinsic::x86_avx2_psrav_d;
4054 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4055 IID = Intrinsic::x86_avx2_psrav_d_256;
4056 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4057 IID = Intrinsic::x86_avx512_psrav_w_128;
4058 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4059 IID = Intrinsic::x86_avx512_psrav_w_256;
4060 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4061 IID = Intrinsic::x86_avx512_psrav_w_512;
4062 else
4063 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4064 } else if (Name.ends_with(".128")) {
4065 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4066 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4067 : Intrinsic::x86_sse2_psra_d;
4068 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4069 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4070 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4071 : Intrinsic::x86_avx512_psra_q_128;
4072 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4073 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4074 : Intrinsic::x86_sse2_psra_w;
4075 else
4076 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4077 } else if (Name.ends_with(".256")) {
4078 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4079 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4080 : Intrinsic::x86_avx2_psra_d;
4081 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4082 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4083 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4084 : Intrinsic::x86_avx512_psra_q_256;
4085 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4086 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4087 : Intrinsic::x86_avx2_psra_w;
4088 else
4089 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4090 } else {
4091 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4092 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4093 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4094 : Intrinsic::x86_avx512_psra_d_512;
4095 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4096 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4097 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4098 : Intrinsic::x86_avx512_psra_q_512;
4099 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4100 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4101 : Intrinsic::x86_avx512_psra_w_512;
4102 else
4103 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4104 }
4105
4106 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4107 } else if (Name.starts_with("avx512.mask.move.s")) {
4108 Rep = upgradeMaskedMove(Builder, *CI);
4109 } else if (Name.starts_with("avx512.cvtmask2")) {
4110 Rep = upgradeMaskToInt(Builder, *CI);
4111 } else if (Name.ends_with(".movntdqa")) {
4113 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4114
4115 LoadInst *LI = Builder.CreateAlignedLoad(
4116 CI->getType(), CI->getArgOperand(0),
4118 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4119 Rep = LI;
4120 } else if (Name.starts_with("fma.vfmadd.") ||
4121 Name.starts_with("fma.vfmsub.") ||
4122 Name.starts_with("fma.vfnmadd.") ||
4123 Name.starts_with("fma.vfnmsub.")) {
4124 bool NegMul = Name[6] == 'n';
4125 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4126 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4127
4128 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4129 CI->getArgOperand(2)};
4130
4131 if (IsScalar) {
4132 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4133 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4134 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4135 }
4136
4137 if (NegMul && !IsScalar)
4138 Ops[0] = Builder.CreateFNeg(Ops[0]);
4139 if (NegMul && IsScalar)
4140 Ops[1] = Builder.CreateFNeg(Ops[1]);
4141 if (NegAcc)
4142 Ops[2] = Builder.CreateFNeg(Ops[2]);
4143
4144 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4145
4146 if (IsScalar)
4147 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4148 } else if (Name.starts_with("fma4.vfmadd.s")) {
4149 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4150 CI->getArgOperand(2)};
4151
4152 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4153 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4154 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4155
4156 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4157
4158 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4159 Rep, (uint64_t)0);
4160 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4161 Name.starts_with("avx512.maskz.vfmadd.s") ||
4162 Name.starts_with("avx512.mask3.vfmadd.s") ||
4163 Name.starts_with("avx512.mask3.vfmsub.s") ||
4164 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4165 bool IsMask3 = Name[11] == '3';
4166 bool IsMaskZ = Name[11] == 'z';
4167 // Drop the "avx512.mask." to make it easier.
4168 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4169 bool NegMul = Name[2] == 'n';
4170 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4171
4172 Value *A = CI->getArgOperand(0);
4173 Value *B = CI->getArgOperand(1);
4174 Value *C = CI->getArgOperand(2);
4175
4176 if (NegMul && (IsMask3 || IsMaskZ))
4177 A = Builder.CreateFNeg(A);
4178 if (NegMul && !(IsMask3 || IsMaskZ))
4179 B = Builder.CreateFNeg(B);
4180 if (NegAcc)
4181 C = Builder.CreateFNeg(C);
4182
4183 A = Builder.CreateExtractElement(A, (uint64_t)0);
4184 B = Builder.CreateExtractElement(B, (uint64_t)0);
4185 C = Builder.CreateExtractElement(C, (uint64_t)0);
4186
4187 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4188 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4189 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4190
4191 Intrinsic::ID IID;
4192 if (Name.back() == 'd')
4193 IID = Intrinsic::x86_avx512_vfmadd_f64;
4194 else
4195 IID = Intrinsic::x86_avx512_vfmadd_f32;
4196 Rep = Builder.CreateIntrinsic(IID, Ops);
4197 } else {
4198 Rep = Builder.CreateFMA(A, B, C);
4199 }
4200
4201 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4202 : IsMask3 ? C
4203 : A;
4204
4205 // For Mask3 with NegAcc, we need to create a new extractelement that
4206 // avoids the negation above.
4207 if (NegAcc && IsMask3)
4208 PassThru =
4209 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4210
4211 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4212 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4213 (uint64_t)0);
4214 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4215 Name.starts_with("avx512.mask.vfnmadd.p") ||
4216 Name.starts_with("avx512.mask.vfnmsub.p") ||
4217 Name.starts_with("avx512.mask3.vfmadd.p") ||
4218 Name.starts_with("avx512.mask3.vfmsub.p") ||
4219 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4220 Name.starts_with("avx512.maskz.vfmadd.p")) {
4221 bool IsMask3 = Name[11] == '3';
4222 bool IsMaskZ = Name[11] == 'z';
4223 // Drop the "avx512.mask." to make it easier.
4224 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4225 bool NegMul = Name[2] == 'n';
4226 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4227
4228 Value *A = CI->getArgOperand(0);
4229 Value *B = CI->getArgOperand(1);
4230 Value *C = CI->getArgOperand(2);
4231
4232 if (NegMul && (IsMask3 || IsMaskZ))
4233 A = Builder.CreateFNeg(A);
4234 if (NegMul && !(IsMask3 || IsMaskZ))
4235 B = Builder.CreateFNeg(B);
4236 if (NegAcc)
4237 C = Builder.CreateFNeg(C);
4238
4239 if (CI->arg_size() == 5 &&
4240 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4241 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4242 Intrinsic::ID IID;
4243 // Check the character before ".512" in string.
4244 if (Name[Name.size() - 5] == 's')
4245 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4246 else
4247 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4248
4249 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4250 } else {
4251 Rep = Builder.CreateFMA(A, B, C);
4252 }
4253
4254 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4255 : IsMask3 ? CI->getArgOperand(2)
4256 : CI->getArgOperand(0);
4257
4258 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4259 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4260 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4261 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4262 Intrinsic::ID IID;
4263 if (VecWidth == 128 && EltWidth == 32)
4264 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4265 else if (VecWidth == 256 && EltWidth == 32)
4266 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4267 else if (VecWidth == 128 && EltWidth == 64)
4268 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4269 else if (VecWidth == 256 && EltWidth == 64)
4270 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4271 else
4272 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4273
4274 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4275 CI->getArgOperand(2)};
4276 Ops[2] = Builder.CreateFNeg(Ops[2]);
4277 Rep = Builder.CreateIntrinsic(IID, Ops);
4278 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4279 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4280 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4281 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4282 bool IsMask3 = Name[11] == '3';
4283 bool IsMaskZ = Name[11] == 'z';
4284 // Drop the "avx512.mask." to make it easier.
4285 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4286 bool IsSubAdd = Name[3] == 's';
4287 if (CI->arg_size() == 5) {
4288 Intrinsic::ID IID;
4289 // Check the character before ".512" in string.
4290 if (Name[Name.size() - 5] == 's')
4291 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4292 else
4293 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4294
4295 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4296 CI->getArgOperand(2), CI->getArgOperand(4)};
4297 if (IsSubAdd)
4298 Ops[2] = Builder.CreateFNeg(Ops[2]);
4299
4300 Rep = Builder.CreateIntrinsic(IID, Ops);
4301 } else {
4302 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4303
4304 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4305 CI->getArgOperand(2)};
4306
4308 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4309 Value *Odd = Builder.CreateCall(FMA, Ops);
4310 Ops[2] = Builder.CreateFNeg(Ops[2]);
4311 Value *Even = Builder.CreateCall(FMA, Ops);
4312
4313 if (IsSubAdd)
4314 std::swap(Even, Odd);
4315
4316 SmallVector<int, 32> Idxs(NumElts);
4317 for (int i = 0; i != NumElts; ++i)
4318 Idxs[i] = i + (i % 2) * NumElts;
4319
4320 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4321 }
4322
4323 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4324 : IsMask3 ? CI->getArgOperand(2)
4325 : CI->getArgOperand(0);
4326
4327 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4328 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4329 Name.starts_with("avx512.maskz.pternlog.")) {
4330 bool ZeroMask = Name[11] == 'z';
4331 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4332 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4333 Intrinsic::ID IID;
4334 if (VecWidth == 128 && EltWidth == 32)
4335 IID = Intrinsic::x86_avx512_pternlog_d_128;
4336 else if (VecWidth == 256 && EltWidth == 32)
4337 IID = Intrinsic::x86_avx512_pternlog_d_256;
4338 else if (VecWidth == 512 && EltWidth == 32)
4339 IID = Intrinsic::x86_avx512_pternlog_d_512;
4340 else if (VecWidth == 128 && EltWidth == 64)
4341 IID = Intrinsic::x86_avx512_pternlog_q_128;
4342 else if (VecWidth == 256 && EltWidth == 64)
4343 IID = Intrinsic::x86_avx512_pternlog_q_256;
4344 else if (VecWidth == 512 && EltWidth == 64)
4345 IID = Intrinsic::x86_avx512_pternlog_q_512;
4346 else
4347 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4348
4349 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4350 CI->getArgOperand(2), CI->getArgOperand(3)};
4351 Rep = Builder.CreateIntrinsic(IID, Args);
4352 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4353 : CI->getArgOperand(0);
4354 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4355 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4356 Name.starts_with("avx512.maskz.vpmadd52")) {
4357 bool ZeroMask = Name[11] == 'z';
4358 bool High = Name[20] == 'h' || Name[21] == 'h';
4359 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4360 Intrinsic::ID IID;
4361 if (VecWidth == 128 && !High)
4362 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4363 else if (VecWidth == 256 && !High)
4364 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4365 else if (VecWidth == 512 && !High)
4366 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4367 else if (VecWidth == 128 && High)
4368 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4369 else if (VecWidth == 256 && High)
4370 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4371 else if (VecWidth == 512 && High)
4372 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4373 else
4374 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4375
4376 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4377 CI->getArgOperand(2)};
4378 Rep = Builder.CreateIntrinsic(IID, Args);
4379 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4380 : CI->getArgOperand(0);
4381 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4382 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4383 Name.starts_with("avx512.mask.vpermt2var.") ||
4384 Name.starts_with("avx512.maskz.vpermt2var.")) {
4385 bool ZeroMask = Name[11] == 'z';
4386 bool IndexForm = Name[17] == 'i';
4387 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4388 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4389 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4390 Name.starts_with("avx512.mask.vpdpbusds.") ||
4391 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4392 bool ZeroMask = Name[11] == 'z';
4393 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4394 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4395 Intrinsic::ID IID;
4396 if (VecWidth == 128 && !IsSaturating)
4397 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4398 else if (VecWidth == 256 && !IsSaturating)
4399 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4400 else if (VecWidth == 512 && !IsSaturating)
4401 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4402 else if (VecWidth == 128 && IsSaturating)
4403 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4404 else if (VecWidth == 256 && IsSaturating)
4405 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4406 else if (VecWidth == 512 && IsSaturating)
4407 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4408 else
4409 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4410
4411 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4412 CI->getArgOperand(2)};
4413
4414 // Input arguments types were incorrectly set to vectors of i32 before but
4415 // they should be vectors of i8. Insert bit cast when encountering the old
4416 // types
4417 if (Args[1]->getType()->isVectorTy() &&
4418 cast<VectorType>(Args[1]->getType())
4419 ->getElementType()
4420 ->isIntegerTy(32) &&
4421 Args[2]->getType()->isVectorTy() &&
4422 cast<VectorType>(Args[2]->getType())
4423 ->getElementType()
4424 ->isIntegerTy(32)) {
4425 Type *NewArgType = nullptr;
4426 if (VecWidth == 128)
4427 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4428 else if (VecWidth == 256)
4429 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4430 else if (VecWidth == 512)
4431 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4432 else
4433 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4434 CI);
4435
4436 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4437 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4438 }
4439
4440 Rep = Builder.CreateIntrinsic(IID, Args);
4441 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4442 : CI->getArgOperand(0);
4443 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4444 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4445 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4446 Name.starts_with("avx512.mask.vpdpwssds.") ||
4447 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4448 bool ZeroMask = Name[11] == 'z';
4449 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4450 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4451 Intrinsic::ID IID;
4452 if (VecWidth == 128 && !IsSaturating)
4453 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4454 else if (VecWidth == 256 && !IsSaturating)
4455 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4456 else if (VecWidth == 512 && !IsSaturating)
4457 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4458 else if (VecWidth == 128 && IsSaturating)
4459 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4460 else if (VecWidth == 256 && IsSaturating)
4461 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4462 else if (VecWidth == 512 && IsSaturating)
4463 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4464 else
4465 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4466
4467 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4468 CI->getArgOperand(2)};
4469
4470 // Input arguments types were incorrectly set to vectors of i32 before but
4471 // they should be vectors of i16. Insert bit cast when encountering the old
4472 // types
4473 if (Args[1]->getType()->isVectorTy() &&
4474 cast<VectorType>(Args[1]->getType())
4475 ->getElementType()
4476 ->isIntegerTy(32) &&
4477 Args[2]->getType()->isVectorTy() &&
4478 cast<VectorType>(Args[2]->getType())
4479 ->getElementType()
4480 ->isIntegerTy(32)) {
4481 Type *NewArgType = nullptr;
4482 if (VecWidth == 128)
4483 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4484 else if (VecWidth == 256)
4485 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4486 else if (VecWidth == 512)
4487 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4488 else
4489 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4490 CI);
4491
4492 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4493 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4494 }
4495
4496 Rep = Builder.CreateIntrinsic(IID, Args);
4497 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4498 : CI->getArgOperand(0);
4499 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4500 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4501 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4502 Name == "subborrow.u32" || Name == "subborrow.u64") {
4503 Intrinsic::ID IID;
4504 if (Name[0] == 'a' && Name.back() == '2')
4505 IID = Intrinsic::x86_addcarry_32;
4506 else if (Name[0] == 'a' && Name.back() == '4')
4507 IID = Intrinsic::x86_addcarry_64;
4508 else if (Name[0] == 's' && Name.back() == '2')
4509 IID = Intrinsic::x86_subborrow_32;
4510 else if (Name[0] == 's' && Name.back() == '4')
4511 IID = Intrinsic::x86_subborrow_64;
4512 else
4513 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4514
4515 // Make a call with 3 operands.
4516 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4517 CI->getArgOperand(2)};
4518 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4519
4520 // Extract the second result and store it.
4521 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4522 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4523 // Replace the original call result with the first result of the new call.
4524 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4525
4526 CI->replaceAllUsesWith(CF);
4527 Rep = nullptr;
4528 } else if (Name.starts_with("avx512.mask.") &&
4529 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4530 // Rep will be updated by the call in the condition.
4531 } else
4532 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4533
4534 return Rep;
4535}
4536
4538 Function *F, IRBuilder<> &Builder) {
4539 if (Name.starts_with("neon.bfcvt")) {
4540 if (Name.starts_with("neon.bfcvtn2")) {
4541 SmallVector<int, 32> LoMask(4);
4542 std::iota(LoMask.begin(), LoMask.end(), 0);
4543 SmallVector<int, 32> ConcatMask(8);
4544 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4545 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4546 Value *Trunc =
4547 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4548 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4549 } else if (Name.starts_with("neon.bfcvtn")) {
4550 SmallVector<int, 32> ConcatMask(8);
4551 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4552 Type *V4BF16 =
4553 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4554 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4555 dbgs() << "Trunc: " << *Trunc << "\n";
4556 return Builder.CreateShuffleVector(
4557 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4558 } else {
4559 return Builder.CreateFPTrunc(CI->getOperand(0),
4560 Type::getBFloatTy(F->getContext()));
4561 }
4562 } else if (Name.starts_with("sve.fcvt")) {
4563 Intrinsic::ID NewID =
4565 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4566 .Case("sve.fcvtnt.bf16f32",
4567 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4569 if (NewID == Intrinsic::not_intrinsic)
4570 llvm_unreachable("Unhandled Intrinsic!");
4571
4572 SmallVector<Value *, 3> Args(CI->args());
4573
4574 // The original intrinsics incorrectly used a predicate based on the
4575 // smallest element type rather than the largest.
4576 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4577 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4578
4579 if (Args[1]->getType() != BadPredTy)
4580 llvm_unreachable("Unexpected predicate type!");
4581
4582 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4583 BadPredTy, Args[1]);
4584 Args[1] = Builder.CreateIntrinsic(
4585 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4586
4587 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4588 CI->getName());
4589 }
4590
4591 llvm_unreachable("Unhandled Intrinsic!");
4592}
4593
4595 IRBuilder<> &Builder) {
4596 if (Name == "mve.vctp64.old") {
4597 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4598 // correct type.
4599 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4600 CI->getArgOperand(0),
4601 /*FMFSource=*/nullptr, CI->getName());
4602 Value *C1 = Builder.CreateIntrinsic(
4603 Intrinsic::arm_mve_pred_v2i,
4604 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4605 return Builder.CreateIntrinsic(
4606 Intrinsic::arm_mve_pred_i2v,
4607 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4608 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4609 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4610 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4611 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4612 Name ==
4613 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4614 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4615 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4616 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4617 Name ==
4618 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4619 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4620 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4621 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4622 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4623 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4624 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4625 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4626 std::vector<Type *> Tys;
4627 unsigned ID = CI->getIntrinsicID();
4628 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4629 switch (ID) {
4630 case Intrinsic::arm_mve_mull_int_predicated:
4631 case Intrinsic::arm_mve_vqdmull_predicated:
4632 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4633 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4634 break;
4635 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4636 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4637 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4638 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4639 V2I1Ty};
4640 break;
4641 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4642 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4643 CI->getOperand(1)->getType(), V2I1Ty};
4644 break;
4645 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4646 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4647 CI->getOperand(2)->getType(), V2I1Ty};
4648 break;
4649 case Intrinsic::arm_cde_vcx1q_predicated:
4650 case Intrinsic::arm_cde_vcx1qa_predicated:
4651 case Intrinsic::arm_cde_vcx2q_predicated:
4652 case Intrinsic::arm_cde_vcx2qa_predicated:
4653 case Intrinsic::arm_cde_vcx3q_predicated:
4654 case Intrinsic::arm_cde_vcx3qa_predicated:
4655 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4656 break;
4657 default:
4658 llvm_unreachable("Unhandled Intrinsic!");
4659 }
4660
4661 std::vector<Value *> Ops;
4662 for (Value *Op : CI->args()) {
4663 Type *Ty = Op->getType();
4664 if (Ty->getScalarSizeInBits() == 1) {
4665 Value *C1 = Builder.CreateIntrinsic(
4666 Intrinsic::arm_mve_pred_v2i,
4667 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4668 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4669 }
4670 Ops.push_back(Op);
4671 }
4672
4673 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4674 CI->getName());
4675 }
4676 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4677}
4678
4679// These are expected to have the arguments:
4680// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4681//
4682// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4683//
4685 Function *F, IRBuilder<> &Builder) {
4686 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4687 // for compatibility.
4688 auto UpgradeLegacyWMMAIUIntrinsicCall =
4689 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4690 ArrayRef<Type *> OverloadTys) -> Value * {
4691 // Prepare arguments, append clamp=0 for compatibility
4692 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4693 Args.push_back(Builder.getFalse());
4694
4695 // Insert the declaration for the right overload types
4697 F->getParent(), F->getIntrinsicID(), OverloadTys);
4698
4699 // Copy operand bundles if any
4701 CI->getOperandBundlesAsDefs(Bundles);
4702
4703 // Create the new call and copy calling properties
4704 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4705 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4706 NewCall->setCallingConv(CI->getCallingConv());
4707 NewCall->setAttributes(CI->getAttributes());
4708 NewCall->setDebugLoc(CI->getDebugLoc());
4709 NewCall->copyMetadata(*CI);
4710 return NewCall;
4711 };
4712
4713 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4714 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4715 "intrinsic should have 7 arguments");
4716 Type *T1 = CI->getArgOperand(4)->getType();
4717 Type *T2 = CI->getArgOperand(1)->getType();
4718 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4719 }
4720 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4721 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4722 "intrinsic should have 8 arguments");
4723 Type *T1 = CI->getArgOperand(4)->getType();
4724 Type *T2 = CI->getArgOperand(1)->getType();
4725 Type *T3 = CI->getArgOperand(3)->getType();
4726 Type *T4 = CI->getArgOperand(5)->getType();
4727 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4728 }
4729
4730 switch (F->getIntrinsicID()) {
4731 default:
4732 break;
4733 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4734 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4735 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4736 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4737 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4738 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4739 // Drop src0 and src1 modifiers.
4740 const Value *Op0 = CI->getArgOperand(0);
4741 const Value *Op2 = CI->getArgOperand(2);
4742 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4743 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4744 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4745 if (!ModA->isZero() || !ModB->isZero())
4746 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4747
4749 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4750 Args.push_back(CI->getArgOperand(I));
4751
4752 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4753 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4754 Overloads.push_back(Args[3]->getType());
4756 F->getParent(), F->getIntrinsicID(), Overloads);
4757
4759 CI->getOperandBundlesAsDefs(Bundles);
4760
4761 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4762 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4763 NewCall->setCallingConv(CI->getCallingConv());
4764 NewCall->setAttributes(CI->getAttributes());
4765 NewCall->setDebugLoc(CI->getDebugLoc());
4766 NewCall->copyMetadata(*CI);
4767 NewCall->takeName(CI);
4768 return NewCall;
4769 }
4770 }
4771
4772 AtomicRMWInst::BinOp RMWOp =
4774 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4775 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4776 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4777 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4778 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4779 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4780 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4781 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4782 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4783 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4784 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4785 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4786 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4787
4788 unsigned NumOperands = CI->getNumOperands();
4789 if (NumOperands < 3) // Malformed bitcode.
4790 return nullptr;
4791
4792 Value *Ptr = CI->getArgOperand(0);
4793 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4794 if (!PtrTy) // Malformed.
4795 return nullptr;
4796
4797 Value *Val = CI->getArgOperand(1);
4798 if (Val->getType() != CI->getType()) // Malformed.
4799 return nullptr;
4800
4801 ConstantInt *OrderArg = nullptr;
4802 bool IsVolatile = false;
4803
4804 // These should have 5 arguments (plus the callee). A separate version of the
4805 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4806 if (NumOperands > 3)
4807 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4808
4809 // Ignore scope argument at 3
4810
4811 if (NumOperands > 5) {
4812 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4813 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4814 }
4815
4817 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4818 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4821
4822 LLVMContext &Ctx = F->getContext();
4823
4824 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4825 Type *RetTy = CI->getType();
4826 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4827 if (VT->getElementType()->isIntegerTy(16)) {
4828 VectorType *AsBF16 =
4829 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4830 Val = Builder.CreateBitCast(Val, AsBF16);
4831 }
4832 }
4833
4834 // The scope argument never really worked correctly. Use agent as the most
4835 // conservative option which should still always produce the instruction.
4836 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4837 AtomicRMWInst *RMW =
4838 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4839
4840 unsigned AddrSpace = PtrTy->getAddressSpace();
4841 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4842 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4843 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4844 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4845 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4846 }
4847
4848 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4849 MDBuilder MDB(F->getContext());
4850 MDNode *RangeNotPrivate =
4853 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4854 }
4855
4856 if (IsVolatile)
4857 RMW->setVolatile(true);
4858
4859 return Builder.CreateBitCast(RMW, RetTy);
4860}
4861
4862/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4863/// plain MDNode, as it's the verifier's job to check these are the correct
4864/// types later.
4865static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4866 if (Op < CI->arg_size()) {
4867 if (MetadataAsValue *MAV =
4869 Metadata *MD = MAV->getMetadata();
4870 return dyn_cast_if_present<MDNode>(MD);
4871 }
4872 }
4873 return nullptr;
4874}
4875
4876/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4877static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4878 if (Op < CI->arg_size())
4880 return MAV->getMetadata();
4881 return nullptr;
4882}
4883
4885 // The MDNode attached to this instruction might not be the correct type,
4886 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4887 return I->getDebugLoc().getAsMDNode();
4888}
4889
4890/// Convert debug intrinsic calls to non-instruction debug records.
4891/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4892/// \p CI - The debug intrinsic call.
4894 DbgRecord *DR = nullptr;
4895 if (Name == "label") {
4897 CI->getDebugLoc());
4898 } else if (Name == "assign") {
4901 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4902 unwrapMAVMetadataOp(CI, 4),
4903 /*The address is a Value ref, it will be stored as a Metadata */
4904 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4905 } else if (Name == "declare") {
4908 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4909 getDebugLocSafe(CI));
4910 } else if (Name == "addr") {
4911 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4912 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4913 // Don't try to add something to the expression if it's not an expression.
4914 // Instead, allow the verifier to fail later.
4915 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4916 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4917 }
4920 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4921 getDebugLocSafe(CI));
4922 } else if (Name == "value") {
4923 // An old version of dbg.value had an extra offset argument.
4924 unsigned VarOp = 1;
4925 unsigned ExprOp = 2;
4926 if (CI->arg_size() == 4) {
4928 // Nonzero offset dbg.values get dropped without a replacement.
4929 if (!Offset || !Offset->isNullValue())
4930 return;
4931 VarOp = 2;
4932 ExprOp = 3;
4933 }
4936 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4937 nullptr, getDebugLocSafe(CI));
4938 }
4939 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4940 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4941}
4942
4945 if (!Offset)
4946 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4947 int64_t OffsetVal = Offset->getSExtValue();
4948 return Builder.CreateIntrinsic(OffsetVal >= 0
4949 ? Intrinsic::vector_splice_left
4950 : Intrinsic::vector_splice_right,
4951 CI->getType(),
4952 {CI->getArgOperand(0), CI->getArgOperand(1),
4953 Builder.getInt32(std::abs(OffsetVal))});
4954}
4955
4957 Function *F, IRBuilder<> &Builder) {
4958 if (Name.starts_with("to.fp16")) {
4959 Value *Cast =
4960 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4961 return Builder.CreateBitCast(Cast, CI->getType());
4962 }
4963
4964 if (Name.starts_with("from.fp16")) {
4965 Value *Cast =
4966 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4967 return Builder.CreateFPExt(Cast, CI->getType());
4968 }
4969
4970 return nullptr;
4971}
4972
4973/// Upgrade a call to an old intrinsic. All argument and return casting must be
4974/// provided to seamlessly integrate with existing context.
4976 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4977 // checks the callee's function type matches. It's likely we need to handle
4978 // type changes here.
4980 if (!F)
4981 return;
4982
4983 LLVMContext &C = CI->getContext();
4984 IRBuilder<> Builder(C);
4985 if (isa<FPMathOperator>(CI))
4986 Builder.setFastMathFlags(CI->getFastMathFlags());
4987 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4988
4989 if (!NewFn) {
4990 // Get the Function's name.
4991 StringRef Name = F->getName();
4992 if (!Name.consume_front("llvm."))
4993 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
4994
4995 bool IsX86 = Name.consume_front("x86.");
4996 bool IsNVVM = Name.consume_front("nvvm.");
4997 bool IsAArch64 = Name.consume_front("aarch64.");
4998 bool IsARM = Name.consume_front("arm.");
4999 bool IsAMDGCN = Name.consume_front("amdgcn.");
5000 bool IsDbg = Name.consume_front("dbg.");
5001 bool IsOldSplice =
5002 (Name.consume_front("experimental.vector.splice") ||
5003 Name.consume_front("vector.splice")) &&
5004 !(Name.starts_with(".left") || Name.starts_with(".right"));
5005 Value *Rep = nullptr;
5006
5007 if (!IsX86 && Name == "stackprotectorcheck") {
5008 Rep = nullptr;
5009 } else if (IsNVVM) {
5010 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5011 } else if (IsX86) {
5012 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5013 } else if (IsAArch64) {
5014 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5015 } else if (IsARM) {
5016 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5017 } else if (IsAMDGCN) {
5018 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5019 } else if (IsDbg) {
5021 } else if (IsOldSplice) {
5022 Rep = upgradeVectorSplice(CI, Builder);
5023 } else if (Name.consume_front("convert.")) {
5024 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5025 } else {
5026 llvm_unreachable("Unknown function for CallBase upgrade.");
5027 }
5028
5029 if (Rep)
5030 CI->replaceAllUsesWith(Rep);
5031 CI->eraseFromParent();
5032 return;
5033 }
5034
5035 const auto &DefaultCase = [&]() -> void {
5036 if (F == NewFn)
5037 return;
5038
5039 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5040 // Handle generic mangling change.
5041 assert(
5042 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5043 "Unknown function for CallBase upgrade and isn't just a name change");
5044 CI->setCalledFunction(NewFn);
5045 return;
5046 }
5047
5048 // This must be an upgrade from a named to a literal struct.
5049 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5050 assert(OldST != NewFn->getReturnType() &&
5051 "Return type must have changed");
5052 assert(OldST->getNumElements() ==
5053 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5054 "Must have same number of elements");
5055
5056 SmallVector<Value *> Args(CI->args());
5057 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5058 NewCI->setAttributes(CI->getAttributes());
5059 Value *Res = PoisonValue::get(OldST);
5060 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5061 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5062 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5063 }
5064 CI->replaceAllUsesWith(Res);
5065 CI->eraseFromParent();
5066 return;
5067 }
5068
5069 // We're probably about to produce something invalid. Let the verifier catch
5070 // it instead of dying here.
5071 CI->setCalledOperand(
5073 return;
5074 };
5075 CallInst *NewCall = nullptr;
5076 switch (NewFn->getIntrinsicID()) {
5077 default: {
5078 DefaultCase();
5079 return;
5080 }
5081 case Intrinsic::arm_neon_vst1:
5082 case Intrinsic::arm_neon_vst2:
5083 case Intrinsic::arm_neon_vst3:
5084 case Intrinsic::arm_neon_vst4:
5085 case Intrinsic::arm_neon_vst2lane:
5086 case Intrinsic::arm_neon_vst3lane:
5087 case Intrinsic::arm_neon_vst4lane: {
5088 SmallVector<Value *, 4> Args(CI->args());
5089 NewCall = Builder.CreateCall(NewFn, Args);
5090 break;
5091 }
5092 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5093 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5094 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5095 LLVMContext &Ctx = F->getParent()->getContext();
5096 SmallVector<Value *, 4> Args(CI->args());
5097 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5098 cast<ConstantInt>(Args[3])->getZExtValue());
5099 NewCall = Builder.CreateCall(NewFn, Args);
5100 break;
5101 }
5102 case Intrinsic::aarch64_sve_ld3_sret:
5103 case Intrinsic::aarch64_sve_ld4_sret:
5104 case Intrinsic::aarch64_sve_ld2_sret: {
5105 StringRef Name = F->getName();
5106 Name = Name.substr(5);
5107 unsigned N = StringSwitch<unsigned>(Name)
5108 .StartsWith("aarch64.sve.ld2", 2)
5109 .StartsWith("aarch64.sve.ld3", 3)
5110 .StartsWith("aarch64.sve.ld4", 4)
5111 .Default(0);
5112 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5113 unsigned MinElts = RetTy->getMinNumElements() / N;
5114 SmallVector<Value *, 2> Args(CI->args());
5115 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5116 Value *Ret = llvm::PoisonValue::get(RetTy);
5117 for (unsigned I = 0; I < N; I++) {
5118 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5119 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5120 }
5121 NewCall = dyn_cast<CallInst>(Ret);
5122 break;
5123 }
5124
5125 case Intrinsic::coro_end: {
5126 SmallVector<Value *, 3> Args(CI->args());
5127 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5128 NewCall = Builder.CreateCall(NewFn, Args);
5129 break;
5130 }
5131
5132 case Intrinsic::vector_extract: {
5133 StringRef Name = F->getName();
5134 Name = Name.substr(5); // Strip llvm
5135 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5136 DefaultCase();
5137 return;
5138 }
5139 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5140 unsigned MinElts = RetTy->getMinNumElements();
5141 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5142 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5143 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5144 break;
5145 }
5146
5147 case Intrinsic::vector_insert: {
5148 StringRef Name = F->getName();
5149 Name = Name.substr(5);
5150 if (!Name.starts_with("aarch64.sve.tuple")) {
5151 DefaultCase();
5152 return;
5153 }
5154 if (Name.starts_with("aarch64.sve.tuple.set")) {
5155 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5156 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5157 Value *NewIdx =
5158 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5159 NewCall = Builder.CreateCall(
5160 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5161 break;
5162 }
5163 if (Name.starts_with("aarch64.sve.tuple.create")) {
5164 unsigned N = StringSwitch<unsigned>(Name)
5165 .StartsWith("aarch64.sve.tuple.create2", 2)
5166 .StartsWith("aarch64.sve.tuple.create3", 3)
5167 .StartsWith("aarch64.sve.tuple.create4", 4)
5168 .Default(0);
5169 assert(N > 1 && "Create is expected to be between 2-4");
5170 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5171 Value *Ret = llvm::PoisonValue::get(RetTy);
5172 unsigned MinElts = RetTy->getMinNumElements() / N;
5173 for (unsigned I = 0; I < N; I++) {
5174 Value *V = CI->getArgOperand(I);
5175 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5176 }
5177 NewCall = dyn_cast<CallInst>(Ret);
5178 }
5179 break;
5180 }
5181
5182 case Intrinsic::arm_neon_bfdot:
5183 case Intrinsic::arm_neon_bfmmla:
5184 case Intrinsic::arm_neon_bfmlalb:
5185 case Intrinsic::arm_neon_bfmlalt:
5186 case Intrinsic::aarch64_neon_bfdot:
5187 case Intrinsic::aarch64_neon_bfmmla:
5188 case Intrinsic::aarch64_neon_bfmlalb:
5189 case Intrinsic::aarch64_neon_bfmlalt: {
5191 assert(CI->arg_size() == 3 &&
5192 "Mismatch between function args and call args");
5193 size_t OperandWidth =
5195 assert((OperandWidth == 64 || OperandWidth == 128) &&
5196 "Unexpected operand width");
5197 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5198 auto Iter = CI->args().begin();
5199 Args.push_back(*Iter++);
5200 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5201 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5202 NewCall = Builder.CreateCall(NewFn, Args);
5203 break;
5204 }
5205
5206 case Intrinsic::bitreverse:
5207 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5208 break;
5209
5210 case Intrinsic::ctlz:
5211 case Intrinsic::cttz: {
5212 if (CI->arg_size() != 1) {
5213 DefaultCase();
5214 return;
5215 }
5216
5217 NewCall =
5218 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5219 break;
5220 }
5221
5222 case Intrinsic::objectsize: {
5223 Value *NullIsUnknownSize =
5224 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5225 Value *Dynamic =
5226 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5227 NewCall = Builder.CreateCall(
5228 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5229 break;
5230 }
5231
5232 case Intrinsic::ctpop:
5233 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5234 break;
5235 case Intrinsic::dbg_value: {
5236 StringRef Name = F->getName();
5237 Name = Name.substr(5); // Strip llvm.
5238 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5239 if (Name.starts_with("dbg.addr")) {
5241 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5242 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5243 NewCall =
5244 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5245 MetadataAsValue::get(C, Expr)});
5246 break;
5247 }
5248
5249 // Upgrade from the old version that had an extra offset argument.
5250 assert(CI->arg_size() == 4);
5251 // Drop nonzero offsets instead of attempting to upgrade them.
5253 if (Offset->isNullValue()) {
5254 NewCall = Builder.CreateCall(
5255 NewFn,
5256 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5257 break;
5258 }
5259 CI->eraseFromParent();
5260 return;
5261 }
5262
5263 case Intrinsic::ptr_annotation:
5264 // Upgrade from versions that lacked the annotation attribute argument.
5265 if (CI->arg_size() != 4) {
5266 DefaultCase();
5267 return;
5268 }
5269
5270 // Create a new call with an added null annotation attribute argument.
5271 NewCall = Builder.CreateCall(
5272 NewFn,
5273 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5274 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5275 NewCall->takeName(CI);
5276 CI->replaceAllUsesWith(NewCall);
5277 CI->eraseFromParent();
5278 return;
5279
5280 case Intrinsic::var_annotation:
5281 // Upgrade from versions that lacked the annotation attribute argument.
5282 if (CI->arg_size() != 4) {
5283 DefaultCase();
5284 return;
5285 }
5286 // Create a new call with an added null annotation attribute argument.
5287 NewCall = Builder.CreateCall(
5288 NewFn,
5289 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5290 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5291 NewCall->takeName(CI);
5292 CI->replaceAllUsesWith(NewCall);
5293 CI->eraseFromParent();
5294 return;
5295
5296 case Intrinsic::riscv_aes32dsi:
5297 case Intrinsic::riscv_aes32dsmi:
5298 case Intrinsic::riscv_aes32esi:
5299 case Intrinsic::riscv_aes32esmi:
5300 case Intrinsic::riscv_sm4ks:
5301 case Intrinsic::riscv_sm4ed: {
5302 // The last argument to these intrinsics used to be i8 and changed to i32.
5303 // The type overload for sm4ks and sm4ed was removed.
5304 Value *Arg2 = CI->getArgOperand(2);
5305 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5306 return;
5307
5308 Value *Arg0 = CI->getArgOperand(0);
5309 Value *Arg1 = CI->getArgOperand(1);
5310 if (CI->getType()->isIntegerTy(64)) {
5311 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5312 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5313 }
5314
5315 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5316 cast<ConstantInt>(Arg2)->getZExtValue());
5317
5318 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5319 Value *Res = NewCall;
5320 if (Res->getType() != CI->getType())
5321 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5322 NewCall->takeName(CI);
5323 CI->replaceAllUsesWith(Res);
5324 CI->eraseFromParent();
5325 return;
5326 }
5327 case Intrinsic::nvvm_mapa_shared_cluster: {
5328 // Create a new call with the correct address space.
5329 NewCall =
5330 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5331 Value *Res = NewCall;
5332 Res = Builder.CreateAddrSpaceCast(
5333 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5334 NewCall->takeName(CI);
5335 CI->replaceAllUsesWith(Res);
5336 CI->eraseFromParent();
5337 return;
5338 }
5339 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5340 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5341 // Create a new call with the correct address space.
5342 SmallVector<Value *, 4> Args(CI->args());
5343 Args[0] = Builder.CreateAddrSpaceCast(
5344 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5345
5346 NewCall = Builder.CreateCall(NewFn, Args);
5347 NewCall->takeName(CI);
5348 CI->replaceAllUsesWith(NewCall);
5349 CI->eraseFromParent();
5350 return;
5351 }
5352 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5353 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5354 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5355 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5356 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5357 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5358 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5359 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5360 SmallVector<Value *, 16> Args(CI->args());
5361
5362 // Create AddrSpaceCast to shared_cluster if needed.
5363 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5364 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5366 Args[0] = Builder.CreateAddrSpaceCast(
5367 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5368
5369 // Attach the flag argument for cta_group, with a
5370 // default value of 0. This handles case (2) in
5371 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5372 size_t NumArgs = CI->arg_size();
5373 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5374 if (!FlagArg->getType()->isIntegerTy(1))
5375 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5376
5377 NewCall = Builder.CreateCall(NewFn, Args);
5378 NewCall->takeName(CI);
5379 CI->replaceAllUsesWith(NewCall);
5380 CI->eraseFromParent();
5381 return;
5382 }
5383 case Intrinsic::riscv_sha256sig0:
5384 case Intrinsic::riscv_sha256sig1:
5385 case Intrinsic::riscv_sha256sum0:
5386 case Intrinsic::riscv_sha256sum1:
5387 case Intrinsic::riscv_sm3p0:
5388 case Intrinsic::riscv_sm3p1: {
5389 // The last argument to these intrinsics used to be i8 and changed to i32.
5390 // The type overload for sm4ks and sm4ed was removed.
5391 if (!CI->getType()->isIntegerTy(64))
5392 return;
5393
5394 Value *Arg =
5395 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5396
5397 NewCall = Builder.CreateCall(NewFn, Arg);
5398 Value *Res =
5399 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5400 NewCall->takeName(CI);
5401 CI->replaceAllUsesWith(Res);
5402 CI->eraseFromParent();
5403 return;
5404 }
5405
5406 case Intrinsic::x86_xop_vfrcz_ss:
5407 case Intrinsic::x86_xop_vfrcz_sd:
5408 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5409 break;
5410
5411 case Intrinsic::x86_xop_vpermil2pd:
5412 case Intrinsic::x86_xop_vpermil2ps:
5413 case Intrinsic::x86_xop_vpermil2pd_256:
5414 case Intrinsic::x86_xop_vpermil2ps_256: {
5415 SmallVector<Value *, 4> Args(CI->args());
5416 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5417 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5418 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5419 NewCall = Builder.CreateCall(NewFn, Args);
5420 break;
5421 }
5422
5423 case Intrinsic::x86_sse41_ptestc:
5424 case Intrinsic::x86_sse41_ptestz:
5425 case Intrinsic::x86_sse41_ptestnzc: {
5426 // The arguments for these intrinsics used to be v4f32, and changed
5427 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5428 // So, the only thing required is a bitcast for both arguments.
5429 // First, check the arguments have the old type.
5430 Value *Arg0 = CI->getArgOperand(0);
5431 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5432 return;
5433
5434 // Old intrinsic, add bitcasts
5435 Value *Arg1 = CI->getArgOperand(1);
5436
5437 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5438
5439 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5440 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5441
5442 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5443 break;
5444 }
5445
5446 case Intrinsic::x86_rdtscp: {
5447 // This used to take 1 arguments. If we have no arguments, it is already
5448 // upgraded.
5449 if (CI->getNumOperands() == 0)
5450 return;
5451
5452 NewCall = Builder.CreateCall(NewFn);
5453 // Extract the second result and store it.
5454 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5455 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5456 // Replace the original call result with the first result of the new call.
5457 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5458
5459 NewCall->takeName(CI);
5460 CI->replaceAllUsesWith(TSC);
5461 CI->eraseFromParent();
5462 return;
5463 }
5464
5465 case Intrinsic::x86_sse41_insertps:
5466 case Intrinsic::x86_sse41_dppd:
5467 case Intrinsic::x86_sse41_dpps:
5468 case Intrinsic::x86_sse41_mpsadbw:
5469 case Intrinsic::x86_avx_dp_ps_256:
5470 case Intrinsic::x86_avx2_mpsadbw: {
5471 // Need to truncate the last argument from i32 to i8 -- this argument models
5472 // an inherently 8-bit immediate operand to these x86 instructions.
5473 SmallVector<Value *, 4> Args(CI->args());
5474
5475 // Replace the last argument with a trunc.
5476 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5477 NewCall = Builder.CreateCall(NewFn, Args);
5478 break;
5479 }
5480
5481 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5482 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5483 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5484 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5485 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5486 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5487 SmallVector<Value *, 4> Args(CI->args());
5488 unsigned NumElts =
5489 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5490 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5491
5492 NewCall = Builder.CreateCall(NewFn, Args);
5493 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5494
5495 NewCall->takeName(CI);
5496 CI->replaceAllUsesWith(Res);
5497 CI->eraseFromParent();
5498 return;
5499 }
5500
5501 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5502 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5503 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5504 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5505 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5506 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5507 SmallVector<Value *, 4> Args(CI->args());
5508 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5509 if (NewFn->getIntrinsicID() ==
5510 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5511 Args[1] = Builder.CreateBitCast(
5512 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5513
5514 NewCall = Builder.CreateCall(NewFn, Args);
5515 Value *Res = Builder.CreateBitCast(
5516 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5517
5518 NewCall->takeName(CI);
5519 CI->replaceAllUsesWith(Res);
5520 CI->eraseFromParent();
5521 return;
5522 }
5523 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5524 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5525 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5526 SmallVector<Value *, 4> Args(CI->args());
5527 unsigned NumElts =
5528 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5529 Args[1] = Builder.CreateBitCast(
5530 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5531 Args[2] = Builder.CreateBitCast(
5532 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5533
5534 NewCall = Builder.CreateCall(NewFn, Args);
5535 break;
5536 }
5537
5538 case Intrinsic::thread_pointer: {
5539 NewCall = Builder.CreateCall(NewFn, {});
5540 break;
5541 }
5542
5543 case Intrinsic::memcpy:
5544 case Intrinsic::memmove:
5545 case Intrinsic::memset: {
5546 // We have to make sure that the call signature is what we're expecting.
5547 // We only want to change the old signatures by removing the alignment arg:
5548 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5549 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5550 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5551 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5552 // Note: i8*'s in the above can be any pointer type
5553 if (CI->arg_size() != 5) {
5554 DefaultCase();
5555 return;
5556 }
5557 // Remove alignment argument (3), and add alignment attributes to the
5558 // dest/src pointers.
5559 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5560 CI->getArgOperand(2), CI->getArgOperand(4)};
5561 NewCall = Builder.CreateCall(NewFn, Args);
5562 AttributeList OldAttrs = CI->getAttributes();
5563 AttributeList NewAttrs = AttributeList::get(
5564 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5565 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5566 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5567 NewCall->setAttributes(NewAttrs);
5568 auto *MemCI = cast<MemIntrinsic>(NewCall);
5569 // All mem intrinsics support dest alignment.
5571 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5572 // Memcpy/Memmove also support source alignment.
5573 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5574 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5575 break;
5576 }
5577
5578 case Intrinsic::masked_load:
5579 case Intrinsic::masked_gather:
5580 case Intrinsic::masked_store:
5581 case Intrinsic::masked_scatter: {
5582 if (CI->arg_size() != 4) {
5583 DefaultCase();
5584 return;
5585 }
5586
5587 auto GetMaybeAlign = [](Value *Op) {
5588 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5589 uint64_t Val = CI->getZExtValue();
5590 if (Val == 0)
5591 return MaybeAlign();
5592 if (isPowerOf2_64(Val))
5593 return MaybeAlign(Val);
5594 }
5595 reportFatalUsageError("Invalid alignment argument");
5596 };
5597 auto GetAlign = [&](Value *Op) {
5598 MaybeAlign Align = GetMaybeAlign(Op);
5599 if (Align)
5600 return *Align;
5601 reportFatalUsageError("Invalid zero alignment argument");
5602 };
5603
5604 const DataLayout &DL = CI->getDataLayout();
5605 switch (NewFn->getIntrinsicID()) {
5606 case Intrinsic::masked_load:
5607 NewCall = Builder.CreateMaskedLoad(
5608 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5609 CI->getArgOperand(2), CI->getArgOperand(3));
5610 break;
5611 case Intrinsic::masked_gather:
5612 NewCall = Builder.CreateMaskedGather(
5613 CI->getType(), CI->getArgOperand(0),
5614 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5615 CI->getType()->getScalarType()),
5616 CI->getArgOperand(2), CI->getArgOperand(3));
5617 break;
5618 case Intrinsic::masked_store:
5619 NewCall = Builder.CreateMaskedStore(
5620 CI->getArgOperand(0), CI->getArgOperand(1),
5621 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5622 break;
5623 case Intrinsic::masked_scatter:
5624 NewCall = Builder.CreateMaskedScatter(
5625 CI->getArgOperand(0), CI->getArgOperand(1),
5626 DL.getValueOrABITypeAlignment(
5627 GetMaybeAlign(CI->getArgOperand(2)),
5628 CI->getArgOperand(0)->getType()->getScalarType()),
5629 CI->getArgOperand(3));
5630 break;
5631 default:
5632 llvm_unreachable("Unexpected intrinsic ID");
5633 }
5634 // Previous metadata is still valid.
5635 NewCall->copyMetadata(*CI);
5636 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5637 break;
5638 }
5639
5640 case Intrinsic::lifetime_start:
5641 case Intrinsic::lifetime_end: {
5642 if (CI->arg_size() != 2) {
5643 DefaultCase();
5644 return;
5645 }
5646
5647 Value *Ptr = CI->getArgOperand(1);
5648 // Try to strip pointer casts, such that the lifetime works on an alloca.
5649 Ptr = Ptr->stripPointerCasts();
5650 if (isa<AllocaInst>(Ptr)) {
5651 // Don't use NewFn, as we might have looked through an addrspacecast.
5652 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5653 NewCall = Builder.CreateLifetimeStart(Ptr);
5654 else
5655 NewCall = Builder.CreateLifetimeEnd(Ptr);
5656 break;
5657 }
5658
5659 // Otherwise remove the lifetime marker.
5660 CI->eraseFromParent();
5661 return;
5662 }
5663
5664 case Intrinsic::x86_avx512_vpdpbusd_128:
5665 case Intrinsic::x86_avx512_vpdpbusd_256:
5666 case Intrinsic::x86_avx512_vpdpbusd_512:
5667 case Intrinsic::x86_avx512_vpdpbusds_128:
5668 case Intrinsic::x86_avx512_vpdpbusds_256:
5669 case Intrinsic::x86_avx512_vpdpbusds_512:
5670 case Intrinsic::x86_avx2_vpdpbssd_128:
5671 case Intrinsic::x86_avx2_vpdpbssd_256:
5672 case Intrinsic::x86_avx10_vpdpbssd_512:
5673 case Intrinsic::x86_avx2_vpdpbssds_128:
5674 case Intrinsic::x86_avx2_vpdpbssds_256:
5675 case Intrinsic::x86_avx10_vpdpbssds_512:
5676 case Intrinsic::x86_avx2_vpdpbsud_128:
5677 case Intrinsic::x86_avx2_vpdpbsud_256:
5678 case Intrinsic::x86_avx10_vpdpbsud_512:
5679 case Intrinsic::x86_avx2_vpdpbsuds_128:
5680 case Intrinsic::x86_avx2_vpdpbsuds_256:
5681 case Intrinsic::x86_avx10_vpdpbsuds_512:
5682 case Intrinsic::x86_avx2_vpdpbuud_128:
5683 case Intrinsic::x86_avx2_vpdpbuud_256:
5684 case Intrinsic::x86_avx10_vpdpbuud_512:
5685 case Intrinsic::x86_avx2_vpdpbuuds_128:
5686 case Intrinsic::x86_avx2_vpdpbuuds_256:
5687 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5688 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5689 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5690 CI->getArgOperand(2)};
5691 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5692 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5693 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5694
5695 NewCall = Builder.CreateCall(NewFn, Args);
5696 break;
5697 }
5698 case Intrinsic::x86_avx512_vpdpwssd_128:
5699 case Intrinsic::x86_avx512_vpdpwssd_256:
5700 case Intrinsic::x86_avx512_vpdpwssd_512:
5701 case Intrinsic::x86_avx512_vpdpwssds_128:
5702 case Intrinsic::x86_avx512_vpdpwssds_256:
5703 case Intrinsic::x86_avx512_vpdpwssds_512:
5704 case Intrinsic::x86_avx2_vpdpwsud_128:
5705 case Intrinsic::x86_avx2_vpdpwsud_256:
5706 case Intrinsic::x86_avx10_vpdpwsud_512:
5707 case Intrinsic::x86_avx2_vpdpwsuds_128:
5708 case Intrinsic::x86_avx2_vpdpwsuds_256:
5709 case Intrinsic::x86_avx10_vpdpwsuds_512:
5710 case Intrinsic::x86_avx2_vpdpwusd_128:
5711 case Intrinsic::x86_avx2_vpdpwusd_256:
5712 case Intrinsic::x86_avx10_vpdpwusd_512:
5713 case Intrinsic::x86_avx2_vpdpwusds_128:
5714 case Intrinsic::x86_avx2_vpdpwusds_256:
5715 case Intrinsic::x86_avx10_vpdpwusds_512:
5716 case Intrinsic::x86_avx2_vpdpwuud_128:
5717 case Intrinsic::x86_avx2_vpdpwuud_256:
5718 case Intrinsic::x86_avx10_vpdpwuud_512:
5719 case Intrinsic::x86_avx2_vpdpwuuds_128:
5720 case Intrinsic::x86_avx2_vpdpwuuds_256:
5721 case Intrinsic::x86_avx10_vpdpwuuds_512:
5722 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5723 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5724 CI->getArgOperand(2)};
5725 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5726 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5727 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5728
5729 NewCall = Builder.CreateCall(NewFn, Args);
5730 break;
5731 }
5732 assert(NewCall && "Should have either set this variable or returned through "
5733 "the default case");
5734 NewCall->takeName(CI);
5735 CI->replaceAllUsesWith(NewCall);
5736 CI->eraseFromParent();
5737}
5738
5740 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5741
5742 // Check if this function should be upgraded and get the replacement function
5743 // if there is one.
5744 Function *NewFn;
5745 if (UpgradeIntrinsicFunction(F, NewFn)) {
5746 // Replace all users of the old function with the new function or new
5747 // instructions. This is not a range loop because the call is deleted.
5748 for (User *U : make_early_inc_range(F->users()))
5749 if (CallBase *CB = dyn_cast<CallBase>(U))
5750 UpgradeIntrinsicCall(CB, NewFn);
5751
5752 // Remove old function, no longer used, from the module.
5753 if (F != NewFn)
5754 F->eraseFromParent();
5755 }
5756}
5757
5759 const unsigned NumOperands = MD.getNumOperands();
5760 if (NumOperands == 0)
5761 return &MD; // Invalid, punt to a verifier error.
5762
5763 // Check if the tag uses struct-path aware TBAA format.
5764 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5765 return &MD;
5766
5767 auto &Context = MD.getContext();
5768 if (NumOperands == 3) {
5769 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5770 MDNode *ScalarType = MDNode::get(Context, Elts);
5771 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5772 Metadata *Elts2[] = {ScalarType, ScalarType,
5775 MD.getOperand(2)};
5776 return MDNode::get(Context, Elts2);
5777 }
5778 // Create a MDNode <MD, MD, offset 0>
5780 Type::getInt64Ty(Context)))};
5781 return MDNode::get(Context, Elts);
5782}
5783
5785 Instruction *&Temp) {
5786 if (Opc != Instruction::BitCast)
5787 return nullptr;
5788
5789 Temp = nullptr;
5790 Type *SrcTy = V->getType();
5791 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5792 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5793 LLVMContext &Context = V->getContext();
5794
5795 // We have no information about target data layout, so we assume that
5796 // the maximum pointer size is 64bit.
5797 Type *MidTy = Type::getInt64Ty(Context);
5798 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5799
5800 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5801 }
5802
5803 return nullptr;
5804}
5805
5807 if (Opc != Instruction::BitCast)
5808 return nullptr;
5809
5810 Type *SrcTy = C->getType();
5811 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5812 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5813 LLVMContext &Context = C->getContext();
5814
5815 // We have no information about target data layout, so we assume that
5816 // the maximum pointer size is 64bit.
5817 Type *MidTy = Type::getInt64Ty(Context);
5818
5820 DestTy);
5821 }
5822
5823 return nullptr;
5824}
5825
5826/// Check the debug info version number, if it is out-dated, drop the debug
5827/// info. Return true if module is modified.
5830 return false;
5831
5832 llvm::TimeTraceScope timeScope("Upgrade debug info");
5833 // We need to get metadata before the module is verified (i.e., getModuleFlag
5834 // makes assumptions that we haven't verified yet). Carefully extract the flag
5835 // from the metadata.
5836 unsigned Version = 0;
5837 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5838 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5839 if (Flag->getNumOperands() < 3)
5840 return false;
5841 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5842 return K->getString() == "Debug Info Version";
5843 return false;
5844 });
5845 if (OpIt != ModFlags->op_end()) {
5846 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5847 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5848 Version = CI->getZExtValue();
5849 }
5850 }
5851
5853 bool BrokenDebugInfo = false;
5854 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5855 report_fatal_error("Broken module found, compilation aborted!");
5856 if (!BrokenDebugInfo)
5857 // Everything is ok.
5858 return false;
5859 else {
5860 // Diagnose malformed debug info.
5862 M.getContext().diagnose(Diag);
5863 }
5864 }
5865 bool Modified = StripDebugInfo(M);
5867 // Diagnose a version mismatch.
5869 M.getContext().diagnose(DiagVersion);
5870 }
5871 return Modified;
5872}
5873
5874static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5875 GlobalValue *GV, const Metadata *V) {
5876 Function *F = cast<Function>(GV);
5877
5878 constexpr StringLiteral DefaultValue = "1";
5879 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5880 unsigned Length = 0;
5881
5882 if (F->hasFnAttribute(Attr)) {
5883 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5884 // parse these elements placing them into Vect3
5885 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5886 for (; Length < 3 && !S.empty(); Length++) {
5887 auto [Part, Rest] = S.split(',');
5888 Vect3[Length] = Part.trim();
5889 S = Rest;
5890 }
5891 }
5892
5893 const unsigned Dim = DimC - 'x';
5894 assert(Dim < 3 && "Unexpected dim char");
5895
5896 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5897
5898 // local variable required for StringRef in Vect3 to point to.
5899 const std::string VStr = llvm::utostr(VInt);
5900 Vect3[Dim] = VStr;
5901 Length = std::max(Length, Dim + 1);
5902
5903 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5904 F->addFnAttr(Attr, NewAttr);
5905}
5906
5907static inline bool isXYZ(StringRef S) {
5908 return S == "x" || S == "y" || S == "z";
5909}
5910
5912 const Metadata *V) {
5913 if (K == "kernel") {
5915 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5916 return true;
5917 }
5918 if (K == "align") {
5919 // V is a bitfeild specifying two 16-bit values. The alignment value is
5920 // specfied in low 16-bits, The index is specified in the high bits. For the
5921 // index, 0 indicates the return value while higher values correspond to
5922 // each parameter (idx = param + 1).
5923 const uint64_t AlignIdxValuePair =
5924 mdconst::extract<ConstantInt>(V)->getZExtValue();
5925 const unsigned Idx = (AlignIdxValuePair >> 16);
5926 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5927 cast<Function>(GV)->addAttributeAtIndex(
5928 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5929 return true;
5930 }
5931 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5932 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5934 return true;
5935 }
5936 if (K == "minctasm") {
5937 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5938 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
5939 return true;
5940 }
5941 if (K == "maxnreg") {
5942 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5943 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
5944 return true;
5945 }
5946 if (K.consume_front("maxntid") && isXYZ(K)) {
5948 return true;
5949 }
5950 if (K.consume_front("reqntid") && isXYZ(K)) {
5952 return true;
5953 }
5954 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5956 return true;
5957 }
5958 if (K == "grid_constant") {
5959 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
5960 for (const auto &Op : cast<MDNode>(V)->operands()) {
5961 // For some reason, the index is 1-based in the metadata. Good thing we're
5962 // able to auto-upgrade it!
5963 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5964 cast<Function>(GV)->addParamAttr(Index, Attr);
5965 }
5966 return true;
5967 }
5968
5969 return false;
5970}
5971
5973 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5974 if (!NamedMD)
5975 return;
5976
5977 SmallVector<MDNode *, 8> NewNodes;
5979 for (MDNode *MD : NamedMD->operands()) {
5980 if (!SeenNodes.insert(MD).second)
5981 continue;
5982
5983 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5984 if (!GV)
5985 continue;
5986
5987 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5988
5989 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5990 // Each nvvm.annotations metadata entry will be of the following form:
5991 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5992 // start index = 1, to skip the global variable key
5993 // increment = 2, to skip the value for each property-value pairs
5994 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5995 MDString *K = cast<MDString>(MD->getOperand(j));
5996 const MDOperand &V = MD->getOperand(j + 1);
5997 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5998 if (!Upgraded)
5999 NewOperands.append({K, V});
6000 }
6001
6002 if (NewOperands.size() > 1)
6003 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6004 }
6005
6006 NamedMD->clearOperands();
6007 for (MDNode *N : NewNodes)
6008 NamedMD->addOperand(N);
6009}
6010
6011/// This checks for objc retain release marker which should be upgraded. It
6012/// returns true if module is modified.
6014 bool Changed = false;
6015 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6016 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6017 if (ModRetainReleaseMarker) {
6018 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6019 if (Op) {
6020 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6021 if (ID) {
6022 SmallVector<StringRef, 4> ValueComp;
6023 ID->getString().split(ValueComp, "#");
6024 if (ValueComp.size() == 2) {
6025 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6026 ID = MDString::get(M.getContext(), NewValue);
6027 }
6028 M.addModuleFlag(Module::Error, MarkerKey, ID);
6029 M.eraseNamedMetadata(ModRetainReleaseMarker);
6030 Changed = true;
6031 }
6032 }
6033 }
6034 return Changed;
6035}
6036
6038 // This lambda converts normal function calls to ARC runtime functions to
6039 // intrinsic calls.
6040 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6041 llvm::Intrinsic::ID IntrinsicFunc) {
6042 Function *Fn = M.getFunction(OldFunc);
6043
6044 if (!Fn)
6045 return;
6046
6047 Function *NewFn =
6048 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6049
6050 for (User *U : make_early_inc_range(Fn->users())) {
6052 if (!CI || CI->getCalledFunction() != Fn)
6053 continue;
6054
6055 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6056 FunctionType *NewFuncTy = NewFn->getFunctionType();
6058
6059 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6060 // value to the return type of the old function.
6061 if (NewFuncTy->getReturnType() != CI->getType() &&
6062 !CastInst::castIsValid(Instruction::BitCast, CI,
6063 NewFuncTy->getReturnType()))
6064 continue;
6065
6066 bool InvalidCast = false;
6067
6068 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6069 Value *Arg = CI->getArgOperand(I);
6070
6071 // Bitcast argument to the parameter type of the new function if it's
6072 // not a variadic argument.
6073 if (I < NewFuncTy->getNumParams()) {
6074 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6075 // to the parameter type of the new function.
6076 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6077 NewFuncTy->getParamType(I))) {
6078 InvalidCast = true;
6079 break;
6080 }
6081 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6082 }
6083 Args.push_back(Arg);
6084 }
6085
6086 if (InvalidCast)
6087 continue;
6088
6089 // Create a call instruction that calls the new function.
6090 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6091 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6092 NewCall->takeName(CI);
6093
6094 // Bitcast the return value back to the type of the old call.
6095 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6096
6097 if (!CI->use_empty())
6098 CI->replaceAllUsesWith(NewRetVal);
6099 CI->eraseFromParent();
6100 }
6101
6102 if (Fn->use_empty())
6103 Fn->eraseFromParent();
6104 };
6105
6106 // Unconditionally convert a call to "clang.arc.use" to a call to
6107 // "llvm.objc.clang.arc.use".
6108 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6109
6110 // Upgrade the retain release marker. If there is no need to upgrade
6111 // the marker, that means either the module is already new enough to contain
6112 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6114 return;
6115
6116 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6117 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6118 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6119 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6120 {"objc_autoreleaseReturnValue",
6121 llvm::Intrinsic::objc_autoreleaseReturnValue},
6122 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6123 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6124 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6125 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6126 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6127 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6128 {"objc_release", llvm::Intrinsic::objc_release},
6129 {"objc_retain", llvm::Intrinsic::objc_retain},
6130 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6131 {"objc_retainAutoreleaseReturnValue",
6132 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6133 {"objc_retainAutoreleasedReturnValue",
6134 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6135 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6136 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6137 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6138 {"objc_unsafeClaimAutoreleasedReturnValue",
6139 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6140 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6141 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6142 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6143 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6144 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6145 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6146 {"objc_arc_annotation_topdown_bbstart",
6147 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6148 {"objc_arc_annotation_topdown_bbend",
6149 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6150 {"objc_arc_annotation_bottomup_bbstart",
6151 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6152 {"objc_arc_annotation_bottomup_bbend",
6153 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6154
6155 for (auto &I : RuntimeFuncs)
6156 UpgradeToIntrinsic(I.first, I.second);
6157}
6158
6160 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6161 if (!ModFlags)
6162 return false;
6163
6164 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6165 bool HasSwiftVersionFlag = false;
6166 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6167 uint32_t SwiftABIVersion;
6168 auto Int8Ty = Type::getInt8Ty(M.getContext());
6169 auto Int32Ty = Type::getInt32Ty(M.getContext());
6170
6171 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6172 MDNode *Op = ModFlags->getOperand(I);
6173 if (Op->getNumOperands() != 3)
6174 continue;
6175 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6176 if (!ID)
6177 continue;
6178 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6179 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6180 Type::getInt32Ty(M.getContext()), B)),
6181 MDString::get(M.getContext(), ID->getString()),
6182 Op->getOperand(2)};
6183 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6184 Changed = true;
6185 };
6186
6187 if (ID->getString() == "Objective-C Image Info Version")
6188 HasObjCFlag = true;
6189 if (ID->getString() == "Objective-C Class Properties")
6190 HasClassProperties = true;
6191 // Upgrade PIC from Error/Max to Min.
6192 if (ID->getString() == "PIC Level") {
6193 if (auto *Behavior =
6195 uint64_t V = Behavior->getLimitedValue();
6196 if (V == Module::Error || V == Module::Max)
6197 SetBehavior(Module::Min);
6198 }
6199 }
6200 // Upgrade "PIE Level" from Error to Max.
6201 if (ID->getString() == "PIE Level")
6202 if (auto *Behavior =
6204 if (Behavior->getLimitedValue() == Module::Error)
6205 SetBehavior(Module::Max);
6206
6207 // Upgrade branch protection and return address signing module flags. The
6208 // module flag behavior for these fields were Error and now they are Min.
6209 if (ID->getString() == "branch-target-enforcement" ||
6210 ID->getString().starts_with("sign-return-address")) {
6211 if (auto *Behavior =
6213 if (Behavior->getLimitedValue() == Module::Error) {
6214 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6215 Metadata *Ops[3] = {
6216 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6217 Op->getOperand(1), Op->getOperand(2)};
6218 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6219 Changed = true;
6220 }
6221 }
6222 }
6223
6224 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6225 // section name so that llvm-lto will not complain about mismatching
6226 // module flags that is functionally the same.
6227 if (ID->getString() == "Objective-C Image Info Section") {
6228 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6229 SmallVector<StringRef, 4> ValueComp;
6230 Value->getString().split(ValueComp, " ");
6231 if (ValueComp.size() != 1) {
6232 std::string NewValue;
6233 for (auto &S : ValueComp)
6234 NewValue += S.str();
6235 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6236 MDString::get(M.getContext(), NewValue)};
6237 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6238 Changed = true;
6239 }
6240 }
6241 }
6242
6243 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6244 // If the higher bits are set, it adds new module flag for swift info.
6245 if (ID->getString() == "Objective-C Garbage Collection") {
6246 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6247 if (Md) {
6248 assert(Md->getValue() && "Expected non-empty metadata");
6249 auto Type = Md->getValue()->getType();
6250 if (Type == Int8Ty)
6251 continue;
6252 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6253 if ((Val & 0xff) != Val) {
6254 HasSwiftVersionFlag = true;
6255 SwiftABIVersion = (Val & 0xff00) >> 8;
6256 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6257 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6258 }
6259 Metadata *Ops[3] = {
6261 Op->getOperand(1),
6262 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6263 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6264 Changed = true;
6265 }
6266 }
6267
6268 if (ID->getString() == "amdgpu_code_object_version") {
6269 Metadata *Ops[3] = {
6270 Op->getOperand(0),
6271 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6272 Op->getOperand(2)};
6273 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6274 Changed = true;
6275 }
6276 }
6277
6278 // "Objective-C Class Properties" is recently added for Objective-C. We
6279 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6280 // flag of value 0, so we can correclty downgrade this flag when trying to
6281 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6282 // this module flag.
6283 if (HasObjCFlag && !HasClassProperties) {
6284 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6285 (uint32_t)0);
6286 Changed = true;
6287 }
6288
6289 if (HasSwiftVersionFlag) {
6290 M.addModuleFlag(Module::Error, "Swift ABI Version",
6291 SwiftABIVersion);
6292 M.addModuleFlag(Module::Error, "Swift Major Version",
6293 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6294 M.addModuleFlag(Module::Error, "Swift Minor Version",
6295 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6296 Changed = true;
6297 }
6298
6299 return Changed;
6300}
6301
6303 auto TrimSpaces = [](StringRef Section) -> std::string {
6304 SmallVector<StringRef, 5> Components;
6305 Section.split(Components, ',');
6306
6307 SmallString<32> Buffer;
6308 raw_svector_ostream OS(Buffer);
6309
6310 for (auto Component : Components)
6311 OS << ',' << Component.trim();
6312
6313 return std::string(OS.str().substr(1));
6314 };
6315
6316 for (auto &GV : M.globals()) {
6317 if (!GV.hasSection())
6318 continue;
6319
6320 StringRef Section = GV.getSection();
6321
6322 if (!Section.starts_with("__DATA, __objc_catlist"))
6323 continue;
6324
6325 // __DATA, __objc_catlist, regular, no_dead_strip
6326 // __DATA,__objc_catlist,regular,no_dead_strip
6327 GV.setSection(TrimSpaces(Section));
6328 }
6329}
6330
6331namespace {
6332// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6333// callsites within a function that did not also have the strictfp attribute.
6334// Since 10.0, if strict FP semantics are needed within a function, the
6335// function must have the strictfp attribute and all calls within the function
6336// must also have the strictfp attribute. This latter restriction is
6337// necessary to prevent unwanted libcall simplification when a function is
6338// being cloned (such as for inlining).
6339//
6340// The "dangling" strictfp attribute usage was only used to prevent constant
6341// folding and other libcall simplification. The nobuiltin attribute on the
6342// callsite has the same effect.
6343struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6344 StrictFPUpgradeVisitor() = default;
6345
6346 void visitCallBase(CallBase &Call) {
6347 if (!Call.isStrictFP())
6348 return;
6350 return;
6351 // If we get here, the caller doesn't have the strictfp attribute
6352 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6353 Call.removeFnAttr(Attribute::StrictFP);
6354 Call.addFnAttr(Attribute::NoBuiltin);
6355 }
6356};
6357
6358/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6359struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6360 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6361 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6362
6363 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6364 if (!RMW.isFloatingPointOperation())
6365 return;
6366
6367 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6368 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6369 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6370 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6371 }
6372};
6373} // namespace
6374
6376 // If a function definition doesn't have the strictfp attribute,
6377 // convert any callsite strictfp attributes to nobuiltin.
6378 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6379 StrictFPUpgradeVisitor SFPV;
6380 SFPV.visit(F);
6381 }
6382
6383 // Remove all incompatibile attributes from function.
6384 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6385 F.getReturnType(), F.getAttributes().getRetAttrs()));
6386 for (auto &Arg : F.args())
6387 Arg.removeAttrs(
6388 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6389
6390 bool AddingAttrs = false, RemovingAttrs = false;
6391 AttrBuilder AttrsToAdd(F.getContext());
6392 AttributeMask AttrsToRemove;
6393
6394 // Older versions of LLVM treated an "implicit-section-name" attribute
6395 // similarly to directly setting the section on a Function.
6396 if (Attribute A = F.getFnAttribute("implicit-section-name");
6397 A.isValid() && A.isStringAttribute()) {
6398 F.setSection(A.getValueAsString());
6399 AttrsToRemove.addAttribute("implicit-section-name");
6400 RemovingAttrs = true;
6401 }
6402
6403 if (Attribute A = F.getFnAttribute("nooutline");
6404 A.isValid() && A.isStringAttribute()) {
6405 AttrsToRemove.addAttribute("nooutline");
6406 AttrsToAdd.addAttribute(Attribute::NoOutline);
6407 AddingAttrs = RemovingAttrs = true;
6408 }
6409
6410 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6411 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6412 AttrsToRemove.addAttribute("uniform-work-group-size");
6413 RemovingAttrs = true;
6414 if (A.getValueAsString() == "true") {
6415 AttrsToAdd.addAttribute("uniform-work-group-size");
6416 AddingAttrs = true;
6417 }
6418 }
6419
6420 if (!F.empty()) {
6421 // For some reason this is called twice, and the first time is before any
6422 // instructions are loaded into the body.
6423
6424 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6425 A.isValid()) {
6426
6427 if (A.getValueAsBool()) {
6428 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6429 Visitor.visit(F);
6430 }
6431
6432 // We will leave behind dead attribute uses on external declarations, but
6433 // clang never added these to declarations anyway.
6434 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6435 RemovingAttrs = true;
6436 }
6437 }
6438
6439 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6440 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6441
6442 bool HandleDenormalMode = false;
6443
6444 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6445 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6446 if (ParsedMode.isValid()) {
6447 DenormalFPMath = ParsedMode;
6448 AttrsToRemove.addAttribute("denormal-fp-math");
6449 AddingAttrs = RemovingAttrs = true;
6450 HandleDenormalMode = true;
6451 }
6452 }
6453
6454 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6455 Attr.isValid()) {
6456 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6457 if (ParsedMode.isValid()) {
6458 DenormalFPMathF32 = ParsedMode;
6459 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6460 AddingAttrs = RemovingAttrs = true;
6461 HandleDenormalMode = true;
6462 }
6463 }
6464
6465 if (HandleDenormalMode)
6466 AttrsToAdd.addDenormalFPEnvAttr(
6467 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6468
6469 if (RemovingAttrs)
6470 F.removeFnAttrs(AttrsToRemove);
6471
6472 if (AddingAttrs)
6473 F.addFnAttrs(AttrsToAdd);
6474}
6475
6476// Check if the function attribute is not present and set it.
6478 StringRef Value) {
6479 if (!F.hasFnAttribute(FnAttrName))
6480 F.addFnAttr(FnAttrName, Value);
6481}
6482
6483// Check if the function attribute is not present and set it if needed.
6484// If the attribute is "false" then removes it.
6485// If the attribute is "true" resets it to a valueless attribute.
6486static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6487 if (!F.hasFnAttribute(FnAttrName)) {
6488 if (Set)
6489 F.addFnAttr(FnAttrName);
6490 } else {
6491 auto A = F.getFnAttribute(FnAttrName);
6492 if ("false" == A.getValueAsString())
6493 F.removeFnAttr(FnAttrName);
6494 else if ("true" == A.getValueAsString()) {
6495 F.removeFnAttr(FnAttrName);
6496 F.addFnAttr(FnAttrName);
6497 }
6498 }
6499}
6500
6502 Triple T(M.getTargetTriple());
6503 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6504 return;
6505
6506 uint64_t BTEValue = 0;
6507 uint64_t BPPLRValue = 0;
6508 uint64_t GCSValue = 0;
6509 uint64_t SRAValue = 0;
6510 uint64_t SRAALLValue = 0;
6511 uint64_t SRABKeyValue = 0;
6512
6513 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6514 if (ModFlags) {
6515 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6516 MDNode *Op = ModFlags->getOperand(I);
6517 if (Op->getNumOperands() != 3)
6518 continue;
6519
6520 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6521 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6522 if (!ID || !CI)
6523 continue;
6524
6525 StringRef IDStr = ID->getString();
6526 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6527 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6528 : IDStr == "guarded-control-stack" ? &GCSValue
6529 : IDStr == "sign-return-address" ? &SRAValue
6530 : IDStr == "sign-return-address-all" ? &SRAALLValue
6531 : IDStr == "sign-return-address-with-bkey"
6532 ? &SRABKeyValue
6533 : nullptr;
6534 if (!ValPtr)
6535 continue;
6536
6537 *ValPtr = CI->getZExtValue();
6538 if (*ValPtr == 2)
6539 return;
6540 }
6541 }
6542
6543 bool BTE = BTEValue == 1;
6544 bool BPPLR = BPPLRValue == 1;
6545 bool GCS = GCSValue == 1;
6546 bool SRA = SRAValue == 1;
6547
6548 StringRef SignTypeValue = "non-leaf";
6549 if (SRA && SRAALLValue == 1)
6550 SignTypeValue = "all";
6551
6552 StringRef SignKeyValue = "a_key";
6553 if (SRA && SRABKeyValue == 1)
6554 SignKeyValue = "b_key";
6555
6556 for (Function &F : M.getFunctionList()) {
6557 if (F.isDeclaration())
6558 continue;
6559
6560 if (SRA) {
6561 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6562 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6563 } else {
6564 if (auto A = F.getFnAttribute("sign-return-address");
6565 A.isValid() && "none" == A.getValueAsString()) {
6566 F.removeFnAttr("sign-return-address");
6567 F.removeFnAttr("sign-return-address-key");
6568 }
6569 }
6570 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6571 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6572 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6573 }
6574
6575 if (BTE)
6576 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6577 if (BPPLR)
6578 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6579 if (GCS)
6580 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6581 if (SRA) {
6582 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6583 if (SRAALLValue == 1)
6584 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6585 if (SRABKeyValue == 1)
6586 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6587 }
6588}
6589
6590static bool isOldLoopArgument(Metadata *MD) {
6591 auto *T = dyn_cast_or_null<MDTuple>(MD);
6592 if (!T)
6593 return false;
6594 if (T->getNumOperands() < 1)
6595 return false;
6596 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6597 if (!S)
6598 return false;
6599 return S->getString().starts_with("llvm.vectorizer.");
6600}
6601
6603 StringRef OldPrefix = "llvm.vectorizer.";
6604 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6605
6606 if (OldTag == "llvm.vectorizer.unroll")
6607 return MDString::get(C, "llvm.loop.interleave.count");
6608
6609 return MDString::get(
6610 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6611 .str());
6612}
6613
6615 auto *T = dyn_cast_or_null<MDTuple>(MD);
6616 if (!T)
6617 return MD;
6618 if (T->getNumOperands() < 1)
6619 return MD;
6620 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6621 if (!OldTag)
6622 return MD;
6623 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6624 return MD;
6625
6626 // This has an old tag. Upgrade it.
6628 Ops.reserve(T->getNumOperands());
6629 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6630 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6631 Ops.push_back(T->getOperand(I));
6632
6633 return MDTuple::get(T->getContext(), Ops);
6634}
6635
6637 auto *T = dyn_cast<MDTuple>(&N);
6638 if (!T)
6639 return &N;
6640
6641 if (none_of(T->operands(), isOldLoopArgument))
6642 return &N;
6643
6645 Ops.reserve(T->getNumOperands());
6646 for (Metadata *MD : T->operands())
6647 Ops.push_back(upgradeLoopArgument(MD));
6648
6649 return MDTuple::get(T->getContext(), Ops);
6650}
6651
6653 Triple T(TT);
6654 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6655 // the address space of globals to 1. This does not apply to SPIRV Logical.
6656 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6657 !DL.contains("-G") && !DL.starts_with("G")) {
6658 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6659 }
6660
6661 if (T.isLoongArch64() || T.isRISCV64()) {
6662 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6663 auto I = DL.find("-n64-");
6664 if (I != StringRef::npos)
6665 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6666 return DL.str();
6667 }
6668
6669 // AMDGPU data layout upgrades.
6670 std::string Res = DL.str();
6671 if (T.isAMDGPU()) {
6672 // Define address spaces for constants.
6673 if (!DL.contains("-G") && !DL.starts_with("G"))
6674 Res.append(Res.empty() ? "G1" : "-G1");
6675
6676 // AMDGCN data layout upgrades.
6677 if (T.isAMDGCN()) {
6678
6679 // Add missing non-integral declarations.
6680 // This goes before adding new address spaces to prevent incoherent string
6681 // values.
6682 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6683 Res.append("-ni:7:8:9");
6684 // Update ni:7 to ni:7:8:9.
6685 if (DL.ends_with("ni:7"))
6686 Res.append(":8:9");
6687 if (DL.ends_with("ni:7:8"))
6688 Res.append(":9");
6689
6690 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6691 // resources) An empty data layout has already been upgraded to G1 by now.
6692 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6693 Res.append("-p7:160:256:256:32");
6694 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6695 Res.append("-p8:128:128:128:48");
6696 constexpr StringRef OldP8("-p8:128:128-");
6697 if (DL.contains(OldP8))
6698 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6699 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6700 Res.append("-p9:192:256:256:32");
6701 }
6702
6703 // Upgrade the ELF mangling mode.
6704 if (!DL.contains("m:e"))
6705 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6706
6707 return Res;
6708 }
6709
6710 if (T.isSystemZ() && !DL.empty()) {
6711 // Make sure the stack alignment is present.
6712 if (!DL.contains("-S64"))
6713 return "E-S64" + DL.drop_front(1).str();
6714 return DL.str();
6715 }
6716
6717 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6718 // If the datalayout matches the expected format, add pointer size address
6719 // spaces to the datalayout.
6720 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6721 if (!DL.contains(AddrSpaces)) {
6723 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6724 if (R.match(Res, &Groups))
6725 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6726 }
6727 };
6728
6729 // AArch64 data layout upgrades.
6730 if (T.isAArch64()) {
6731 // Add "-Fn32"
6732 if (!DL.empty() && !DL.contains("-Fn32"))
6733 Res.append("-Fn32");
6734 AddPtr32Ptr64AddrSpaces();
6735 return Res;
6736 }
6737
6738 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6739 T.isWasm()) {
6740 // Mips64 with o32 ABI did not add "-i128:128".
6741 // Add "-i128:128"
6742 std::string I64 = "-i64:64";
6743 std::string I128 = "-i128:128";
6744 if (!StringRef(Res).contains(I128)) {
6745 size_t Pos = Res.find(I64);
6746 if (Pos != size_t(-1))
6747 Res.insert(Pos + I64.size(), I128);
6748 }
6749 }
6750
6751 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6752 size_t Pos = Res.find("-S128");
6753 if (Pos == StringRef::npos)
6754 Pos = Res.size();
6755 Res.insert(Pos, "-f64:32:64");
6756 }
6757
6758 if (!T.isX86())
6759 return Res;
6760
6761 AddPtr32Ptr64AddrSpaces();
6762
6763 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6764 // for i128 operations prior to this being reflected in the data layout, and
6765 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6766 // boundaries, so although this is a breaking change, the upgrade is expected
6767 // to fix more IR than it breaks.
6768 // Intel MCU is an exception and uses 4-byte-alignment.
6769 if (!T.isOSIAMCU()) {
6770 std::string I128 = "-i128:128";
6771 if (StringRef Ref = Res; !Ref.contains(I128)) {
6773 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6774 if (R.match(Res, &Groups))
6775 Res = (Groups[1] + I128 + Groups[3]).str();
6776 }
6777 }
6778
6779 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6780 // Raising the alignment is safe because Clang did not produce f80 values in
6781 // the MSVC environment before this upgrade was added.
6782 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6783 StringRef Ref = Res;
6784 auto I = Ref.find("-f80:32-");
6785 if (I != StringRef::npos)
6786 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6787 }
6788
6789 return Res;
6790}
6791
6792void llvm::UpgradeAttributes(AttrBuilder &B) {
6793 StringRef FramePointer;
6794 Attribute A = B.getAttribute("no-frame-pointer-elim");
6795 if (A.isValid()) {
6796 // The value can be "true" or "false".
6797 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6798 B.removeAttribute("no-frame-pointer-elim");
6799 }
6800 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6801 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6802 if (FramePointer != "all")
6803 FramePointer = "non-leaf";
6804 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6805 }
6806 if (!FramePointer.empty())
6807 B.addAttribute("frame-pointer", FramePointer);
6808
6809 A = B.getAttribute("null-pointer-is-valid");
6810 if (A.isValid()) {
6811 // The value can be "true" or "false".
6812 bool NullPointerIsValid = A.getValueAsString() == "true";
6813 B.removeAttribute("null-pointer-is-valid");
6814 if (NullPointerIsValid)
6815 B.addAttribute(Attribute::NullPointerIsValid);
6816 }
6817
6818 A = B.getAttribute("uniform-work-group-size");
6819 if (A.isValid()) {
6820 StringRef Val = A.getValueAsString();
6821 if (!Val.empty()) {
6822 bool IsTrue = Val == "true";
6823 B.removeAttribute("uniform-work-group-size");
6824 if (IsTrue)
6825 B.addAttribute("uniform-work-group-size");
6826 }
6827 }
6828}
6829
6830void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6831 // clang.arc.attachedcall bundles are now required to have an operand.
6832 // If they don't, it's okay to drop them entirely: when there is an operand,
6833 // the "attachedcall" is meaningful and required, but without an operand,
6834 // it's just a marker NOP. Dropping it merely prevents an optimization.
6835 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6836 return OBD.getTag() == "clang.arc.attachedcall" &&
6837 OBD.inputs().empty();
6838 });
6839}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:96
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:449
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:622
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1760
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:895
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:287
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:347
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106