34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
63 cl::desc(
"Disable autoupgrade of debug info"));
82 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
97 Type *LastArgType =
F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
113 if (
F->getReturnType()->isVectorTy())
126 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
127 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
144 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
145 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
159 if (
F->getReturnType()->getScalarType()->isBFloatTy())
169 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
183 if (Name.consume_front(
"avx."))
184 return (Name.starts_with(
"blend.p") ||
185 Name ==
"cvt.ps2.pd.256" ||
186 Name ==
"cvtdq2.pd.256" ||
187 Name ==
"cvtdq2.ps.256" ||
188 Name.starts_with(
"movnt.") ||
189 Name.starts_with(
"sqrt.p") ||
190 Name.starts_with(
"storeu.") ||
191 Name.starts_with(
"vbroadcast.s") ||
192 Name.starts_with(
"vbroadcastf128") ||
193 Name.starts_with(
"vextractf128.") ||
194 Name.starts_with(
"vinsertf128.") ||
195 Name.starts_with(
"vperm2f128.") ||
196 Name.starts_with(
"vpermil."));
198 if (Name.consume_front(
"avx2."))
199 return (Name ==
"movntdqa" ||
200 Name.starts_with(
"pabs.") ||
201 Name.starts_with(
"padds.") ||
202 Name.starts_with(
"paddus.") ||
203 Name.starts_with(
"pblendd.") ||
205 Name.starts_with(
"pbroadcast") ||
206 Name.starts_with(
"pcmpeq.") ||
207 Name.starts_with(
"pcmpgt.") ||
208 Name.starts_with(
"pmax") ||
209 Name.starts_with(
"pmin") ||
210 Name.starts_with(
"pmovsx") ||
211 Name.starts_with(
"pmovzx") ||
213 Name ==
"pmulu.dq" ||
214 Name.starts_with(
"psll.dq") ||
215 Name.starts_with(
"psrl.dq") ||
216 Name.starts_with(
"psubs.") ||
217 Name.starts_with(
"psubus.") ||
218 Name.starts_with(
"vbroadcast") ||
219 Name ==
"vbroadcasti128" ||
220 Name ==
"vextracti128" ||
221 Name ==
"vinserti128" ||
222 Name ==
"vperm2i128");
224 if (Name.consume_front(
"avx512.")) {
225 if (Name.consume_front(
"mask."))
227 return (Name.starts_with(
"add.p") ||
228 Name.starts_with(
"and.") ||
229 Name.starts_with(
"andn.") ||
230 Name.starts_with(
"broadcast.s") ||
231 Name.starts_with(
"broadcastf32x4.") ||
232 Name.starts_with(
"broadcastf32x8.") ||
233 Name.starts_with(
"broadcastf64x2.") ||
234 Name.starts_with(
"broadcastf64x4.") ||
235 Name.starts_with(
"broadcasti32x4.") ||
236 Name.starts_with(
"broadcasti32x8.") ||
237 Name.starts_with(
"broadcasti64x2.") ||
238 Name.starts_with(
"broadcasti64x4.") ||
239 Name.starts_with(
"cmp.b") ||
240 Name.starts_with(
"cmp.d") ||
241 Name.starts_with(
"cmp.q") ||
242 Name.starts_with(
"cmp.w") ||
243 Name.starts_with(
"compress.b") ||
244 Name.starts_with(
"compress.d") ||
245 Name.starts_with(
"compress.p") ||
246 Name.starts_with(
"compress.q") ||
247 Name.starts_with(
"compress.store.") ||
248 Name.starts_with(
"compress.w") ||
249 Name.starts_with(
"conflict.") ||
250 Name.starts_with(
"cvtdq2pd.") ||
251 Name.starts_with(
"cvtdq2ps.") ||
252 Name ==
"cvtpd2dq.256" ||
253 Name ==
"cvtpd2ps.256" ||
254 Name ==
"cvtps2pd.128" ||
255 Name ==
"cvtps2pd.256" ||
256 Name.starts_with(
"cvtqq2pd.") ||
257 Name ==
"cvtqq2ps.256" ||
258 Name ==
"cvtqq2ps.512" ||
259 Name ==
"cvttpd2dq.256" ||
260 Name ==
"cvttps2dq.128" ||
261 Name ==
"cvttps2dq.256" ||
262 Name.starts_with(
"cvtudq2pd.") ||
263 Name.starts_with(
"cvtudq2ps.") ||
264 Name.starts_with(
"cvtuqq2pd.") ||
265 Name ==
"cvtuqq2ps.256" ||
266 Name ==
"cvtuqq2ps.512" ||
267 Name.starts_with(
"dbpsadbw.") ||
268 Name.starts_with(
"div.p") ||
269 Name.starts_with(
"expand.b") ||
270 Name.starts_with(
"expand.d") ||
271 Name.starts_with(
"expand.load.") ||
272 Name.starts_with(
"expand.p") ||
273 Name.starts_with(
"expand.q") ||
274 Name.starts_with(
"expand.w") ||
275 Name.starts_with(
"fpclass.p") ||
276 Name.starts_with(
"insert") ||
277 Name.starts_with(
"load.") ||
278 Name.starts_with(
"loadu.") ||
279 Name.starts_with(
"lzcnt.") ||
280 Name.starts_with(
"max.p") ||
281 Name.starts_with(
"min.p") ||
282 Name.starts_with(
"movddup") ||
283 Name.starts_with(
"move.s") ||
284 Name.starts_with(
"movshdup") ||
285 Name.starts_with(
"movsldup") ||
286 Name.starts_with(
"mul.p") ||
287 Name.starts_with(
"or.") ||
288 Name.starts_with(
"pabs.") ||
289 Name.starts_with(
"packssdw.") ||
290 Name.starts_with(
"packsswb.") ||
291 Name.starts_with(
"packusdw.") ||
292 Name.starts_with(
"packuswb.") ||
293 Name.starts_with(
"padd.") ||
294 Name.starts_with(
"padds.") ||
295 Name.starts_with(
"paddus.") ||
296 Name.starts_with(
"palignr.") ||
297 Name.starts_with(
"pand.") ||
298 Name.starts_with(
"pandn.") ||
299 Name.starts_with(
"pavg") ||
300 Name.starts_with(
"pbroadcast") ||
301 Name.starts_with(
"pcmpeq.") ||
302 Name.starts_with(
"pcmpgt.") ||
303 Name.starts_with(
"perm.df.") ||
304 Name.starts_with(
"perm.di.") ||
305 Name.starts_with(
"permvar.") ||
306 Name.starts_with(
"pmaddubs.w.") ||
307 Name.starts_with(
"pmaddw.d.") ||
308 Name.starts_with(
"pmax") ||
309 Name.starts_with(
"pmin") ||
310 Name ==
"pmov.qd.256" ||
311 Name ==
"pmov.qd.512" ||
312 Name ==
"pmov.wb.256" ||
313 Name ==
"pmov.wb.512" ||
314 Name.starts_with(
"pmovsx") ||
315 Name.starts_with(
"pmovzx") ||
316 Name.starts_with(
"pmul.dq.") ||
317 Name.starts_with(
"pmul.hr.sw.") ||
318 Name.starts_with(
"pmulh.w.") ||
319 Name.starts_with(
"pmulhu.w.") ||
320 Name.starts_with(
"pmull.") ||
321 Name.starts_with(
"pmultishift.qb.") ||
322 Name.starts_with(
"pmulu.dq.") ||
323 Name.starts_with(
"por.") ||
324 Name.starts_with(
"prol.") ||
325 Name.starts_with(
"prolv.") ||
326 Name.starts_with(
"pror.") ||
327 Name.starts_with(
"prorv.") ||
328 Name.starts_with(
"pshuf.b.") ||
329 Name.starts_with(
"pshuf.d.") ||
330 Name.starts_with(
"pshufh.w.") ||
331 Name.starts_with(
"pshufl.w.") ||
332 Name.starts_with(
"psll.d") ||
333 Name.starts_with(
"psll.q") ||
334 Name.starts_with(
"psll.w") ||
335 Name.starts_with(
"pslli") ||
336 Name.starts_with(
"psllv") ||
337 Name.starts_with(
"psra.d") ||
338 Name.starts_with(
"psra.q") ||
339 Name.starts_with(
"psra.w") ||
340 Name.starts_with(
"psrai") ||
341 Name.starts_with(
"psrav") ||
342 Name.starts_with(
"psrl.d") ||
343 Name.starts_with(
"psrl.q") ||
344 Name.starts_with(
"psrl.w") ||
345 Name.starts_with(
"psrli") ||
346 Name.starts_with(
"psrlv") ||
347 Name.starts_with(
"psub.") ||
348 Name.starts_with(
"psubs.") ||
349 Name.starts_with(
"psubus.") ||
350 Name.starts_with(
"pternlog.") ||
351 Name.starts_with(
"punpckh") ||
352 Name.starts_with(
"punpckl") ||
353 Name.starts_with(
"pxor.") ||
354 Name.starts_with(
"shuf.f") ||
355 Name.starts_with(
"shuf.i") ||
356 Name.starts_with(
"shuf.p") ||
357 Name.starts_with(
"sqrt.p") ||
358 Name.starts_with(
"store.b.") ||
359 Name.starts_with(
"store.d.") ||
360 Name.starts_with(
"store.p") ||
361 Name.starts_with(
"store.q.") ||
362 Name.starts_with(
"store.w.") ||
363 Name ==
"store.ss" ||
364 Name.starts_with(
"storeu.") ||
365 Name.starts_with(
"sub.p") ||
366 Name.starts_with(
"ucmp.") ||
367 Name.starts_with(
"unpckh.") ||
368 Name.starts_with(
"unpckl.") ||
369 Name.starts_with(
"valign.") ||
370 Name ==
"vcvtph2ps.128" ||
371 Name ==
"vcvtph2ps.256" ||
372 Name.starts_with(
"vextract") ||
373 Name.starts_with(
"vfmadd.") ||
374 Name.starts_with(
"vfmaddsub.") ||
375 Name.starts_with(
"vfnmadd.") ||
376 Name.starts_with(
"vfnmsub.") ||
377 Name.starts_with(
"vpdpbusd.") ||
378 Name.starts_with(
"vpdpbusds.") ||
379 Name.starts_with(
"vpdpwssd.") ||
380 Name.starts_with(
"vpdpwssds.") ||
381 Name.starts_with(
"vpermi2var.") ||
382 Name.starts_with(
"vpermil.p") ||
383 Name.starts_with(
"vpermilvar.") ||
384 Name.starts_with(
"vpermt2var.") ||
385 Name.starts_with(
"vpmadd52") ||
386 Name.starts_with(
"vpshld.") ||
387 Name.starts_with(
"vpshldv.") ||
388 Name.starts_with(
"vpshrd.") ||
389 Name.starts_with(
"vpshrdv.") ||
390 Name.starts_with(
"vpshufbitqmb.") ||
391 Name.starts_with(
"xor."));
393 if (Name.consume_front(
"mask3."))
395 return (Name.starts_with(
"vfmadd.") ||
396 Name.starts_with(
"vfmaddsub.") ||
397 Name.starts_with(
"vfmsub.") ||
398 Name.starts_with(
"vfmsubadd.") ||
399 Name.starts_with(
"vfnmsub."));
401 if (Name.consume_front(
"maskz."))
403 return (Name.starts_with(
"pternlog.") ||
404 Name.starts_with(
"vfmadd.") ||
405 Name.starts_with(
"vfmaddsub.") ||
406 Name.starts_with(
"vpdpbusd.") ||
407 Name.starts_with(
"vpdpbusds.") ||
408 Name.starts_with(
"vpdpwssd.") ||
409 Name.starts_with(
"vpdpwssds.") ||
410 Name.starts_with(
"vpermt2var.") ||
411 Name.starts_with(
"vpmadd52") ||
412 Name.starts_with(
"vpshldv.") ||
413 Name.starts_with(
"vpshrdv."));
416 return (Name ==
"movntdqa" ||
417 Name ==
"pmul.dq.512" ||
418 Name ==
"pmulu.dq.512" ||
419 Name.starts_with(
"broadcastm") ||
420 Name.starts_with(
"cmp.p") ||
421 Name.starts_with(
"cvtb2mask.") ||
422 Name.starts_with(
"cvtd2mask.") ||
423 Name.starts_with(
"cvtmask2") ||
424 Name.starts_with(
"cvtq2mask.") ||
425 Name ==
"cvtusi2sd" ||
426 Name.starts_with(
"cvtw2mask.") ||
431 Name ==
"kortestc.w" ||
432 Name ==
"kortestz.w" ||
433 Name.starts_with(
"kunpck") ||
436 Name.starts_with(
"padds.") ||
437 Name.starts_with(
"pbroadcast") ||
438 Name.starts_with(
"prol") ||
439 Name.starts_with(
"pror") ||
440 Name.starts_with(
"psll.dq") ||
441 Name.starts_with(
"psrl.dq") ||
442 Name.starts_with(
"psubs.") ||
443 Name.starts_with(
"ptestm") ||
444 Name.starts_with(
"ptestnm") ||
445 Name.starts_with(
"storent.") ||
446 Name.starts_with(
"vbroadcast.s") ||
447 Name.starts_with(
"vpshld.") ||
448 Name.starts_with(
"vpshrd."));
451 if (Name.consume_front(
"fma."))
452 return (Name.starts_with(
"vfmadd.") ||
453 Name.starts_with(
"vfmsub.") ||
454 Name.starts_with(
"vfmsubadd.") ||
455 Name.starts_with(
"vfnmadd.") ||
456 Name.starts_with(
"vfnmsub."));
458 if (Name.consume_front(
"fma4."))
459 return Name.starts_with(
"vfmadd.s");
461 if (Name.consume_front(
"sse."))
462 return (Name ==
"add.ss" ||
463 Name ==
"cvtsi2ss" ||
464 Name ==
"cvtsi642ss" ||
467 Name.starts_with(
"sqrt.p") ||
469 Name.starts_with(
"storeu.") ||
472 if (Name.consume_front(
"sse2."))
473 return (Name ==
"add.sd" ||
474 Name ==
"cvtdq2pd" ||
475 Name ==
"cvtdq2ps" ||
476 Name ==
"cvtps2pd" ||
477 Name ==
"cvtsi2sd" ||
478 Name ==
"cvtsi642sd" ||
479 Name ==
"cvtss2sd" ||
482 Name.starts_with(
"padds.") ||
483 Name.starts_with(
"paddus.") ||
484 Name.starts_with(
"pcmpeq.") ||
485 Name.starts_with(
"pcmpgt.") ||
490 Name ==
"pmulu.dq" ||
491 Name.starts_with(
"pshuf") ||
492 Name.starts_with(
"psll.dq") ||
493 Name.starts_with(
"psrl.dq") ||
494 Name.starts_with(
"psubs.") ||
495 Name.starts_with(
"psubus.") ||
496 Name.starts_with(
"sqrt.p") ||
498 Name ==
"storel.dq" ||
499 Name.starts_with(
"storeu.") ||
502 if (Name.consume_front(
"sse41."))
503 return (Name.starts_with(
"blendp") ||
504 Name ==
"movntdqa" ||
514 Name.starts_with(
"pmovsx") ||
515 Name.starts_with(
"pmovzx") ||
518 if (Name.consume_front(
"sse42."))
519 return Name ==
"crc32.64.8";
521 if (Name.consume_front(
"sse4a."))
522 return Name.starts_with(
"movnt.");
524 if (Name.consume_front(
"ssse3."))
525 return (Name ==
"pabs.b.128" ||
526 Name ==
"pabs.d.128" ||
527 Name ==
"pabs.w.128");
529 if (Name.consume_front(
"xop."))
530 return (Name ==
"vpcmov" ||
531 Name ==
"vpcmov.256" ||
532 Name.starts_with(
"vpcom") ||
533 Name.starts_with(
"vprot"));
535 return (Name ==
"addcarry.u32" ||
536 Name ==
"addcarry.u64" ||
537 Name ==
"addcarryx.u32" ||
538 Name ==
"addcarryx.u64" ||
539 Name ==
"subborrow.u32" ||
540 Name ==
"subborrow.u64" ||
541 Name.starts_with(
"vcvtph2ps."));
547 if (!Name.consume_front(
"x86."))
555 if (Name ==
"rdtscp") {
557 if (
F->getFunctionType()->getNumParams() == 0)
562 Intrinsic::x86_rdtscp);
569 if (Name.consume_front(
"sse41.ptest")) {
571 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
572 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
573 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
586 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
596 if (Name.consume_front(
"avx512.")) {
597 if (Name.consume_front(
"mask.cmp.")) {
600 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
609 }
else if (Name.starts_with(
"vpdpbusd.") ||
610 Name.starts_with(
"vpdpbusds.")) {
613 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
622 }
else if (Name.starts_with(
"vpdpwssd.") ||
623 Name.starts_with(
"vpdpwssds.")) {
626 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 if (Name.consume_front(
"avx2.")) {
640 if (Name.consume_front(
"vpdpb")) {
643 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
658 }
else if (Name.consume_front(
"vpdpw")) {
661 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 if (Name.consume_front(
"avx10.")) {
681 if (Name.consume_front(
"vpdpb")) {
684 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
693 }
else if (Name.consume_front(
"vpdpw")) {
695 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 if (Name.consume_front(
"avx512bf16.")) {
711 .
Case(
"cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .
Case(
"cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .
Case(
"cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .
Case(
"mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .
Case(
"cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .
Case(
"cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
729 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
738 if (Name.consume_front(
"xop.")) {
740 if (Name.starts_with(
"vpermil2")) {
743 auto Idx =
F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
756 }
else if (
F->arg_size() == 2)
759 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
771 if (Name ==
"seh.recoverfp") {
773 Intrinsic::eh_recoverfp);
785 if (Name.starts_with(
"rbit")) {
788 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
792 if (Name ==
"thread.pointer") {
795 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
799 bool Neon = Name.consume_front(
"neon.");
804 if (Name.consume_front(
"bfdot.")) {
808 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
813 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
817 std::array<Type *, 2> Tys{
828 if (Name.consume_front(
"bfm")) {
830 if (Name.consume_back(
".v4f32.v16i8")) {
876 F->arg_begin()->getType());
880 if (Name.consume_front(
"vst")) {
882 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
893 auto fArgs =
F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
909 if (Name.consume_front(
"mve.")) {
911 if (Name ==
"vctp64") {
921 if (Name.starts_with(
"vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
928 if (Name.consume_back(
".v4i1")) {
930 if (Name.consume_back(
".predicated.v2i64.v4i32"))
932 return Name ==
"mull.int" || Name ==
"vqdmull";
934 if (Name.consume_back(
".v2i64")) {
936 bool IsGather = Name.consume_front(
"vldr.gather.");
937 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
938 if (Name.consume_front(
"base.")) {
940 Name.consume_front(
"wb.");
943 return Name ==
"predicated.v2i64";
946 if (Name.consume_front(
"offset.predicated."))
947 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
948 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
961 if (Name.consume_front(
"cde.vcx")) {
963 if (Name.consume_back(
".predicated.v2i64.v4i1"))
965 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
966 Name ==
"3q" || Name ==
"3qa";
980 F->arg_begin()->getType());
984 if (Name.starts_with(
"addp")) {
986 if (
F->arg_size() != 2)
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 if (Name.starts_with(
"bfcvt")) {
1004 if (Name.consume_front(
"sve.")) {
1006 if (Name.consume_front(
"bf")) {
1007 if (Name.consume_back(
".lane")) {
1011 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1012 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1013 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1025 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1030 if (Name.consume_front(
"addqv")) {
1032 if (!
F->getReturnType()->isFPOrFPVectorTy())
1035 auto Args =
F->getFunctionType()->params();
1036 Type *Tys[] = {
F->getReturnType(), Args[1]};
1038 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1042 if (Name.consume_front(
"ld")) {
1044 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1045 if (LdRegex.
match(Name)) {
1052 Intrinsic::aarch64_sve_ld2_sret,
1053 Intrinsic::aarch64_sve_ld3_sret,
1054 Intrinsic::aarch64_sve_ld4_sret,
1057 LoadIDs[Name[0] -
'2'], Ty);
1063 if (Name.consume_front(
"tuple.")) {
1065 if (Name.starts_with(
"get")) {
1067 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1069 F->getParent(), Intrinsic::vector_extract, Tys);
1073 if (Name.starts_with(
"set")) {
1075 auto Args =
F->getFunctionType()->params();
1076 Type *Tys[] = {Args[0], Args[2], Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1082 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1083 if (CreateTupleRegex.
match(Name)) {
1085 auto Args =
F->getFunctionType()->params();
1086 Type *Tys[] = {
F->getReturnType(), Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1094 if (Name.starts_with(
"rev.nxv")) {
1097 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1109 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1113 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1115 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1117 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1118 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1119 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1120 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1121 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1122 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1131 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1145 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1146 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1156 if (Name.consume_front(
"mapa.shared.cluster"))
1157 if (
F->getReturnType()->getPointerAddressSpace() ==
1159 return Intrinsic::nvvm_mapa_shared_cluster;
1161 if (Name.consume_front(
"cp.async.bulk.")) {
1164 .
Case(
"global.to.shared.cluster",
1165 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1166 .
Case(
"shared.cta.to.cluster",
1167 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1171 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1180 if (Name.consume_front(
"fma.rn."))
1182 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1183 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1184 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1185 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1188 if (Name.consume_front(
"fmax."))
1190 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1191 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1192 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1193 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1194 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1195 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1196 .
Case(
"ftz.nan.xorsign.abs.bf16",
1197 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1198 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1199 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1200 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1201 .
Case(
"ftz.xorsign.abs.bf16x2",
1202 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1203 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1204 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1205 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1206 .
Case(
"nan.xorsign.abs.bf16x2",
1207 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1208 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1209 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1212 if (Name.consume_front(
"fmin."))
1214 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1215 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1216 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1217 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1218 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1219 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1220 .
Case(
"ftz.nan.xorsign.abs.bf16",
1221 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1222 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1223 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1224 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1225 .
Case(
"ftz.xorsign.abs.bf16x2",
1226 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1227 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1228 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1229 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1230 .
Case(
"nan.xorsign.abs.bf16x2",
1231 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1232 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1233 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1236 if (Name.consume_front(
"neg."))
1238 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1239 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1246 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1247 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1248 Name.consume_front(
"param");
1254 if (Name.starts_with(
"to.fp16")) {
1258 FuncTy->getReturnType());
1261 if (Name.starts_with(
"from.fp16")) {
1265 FuncTy->getReturnType());
1272 bool CanUpgradeDebugIntrinsicsToRecords) {
1273 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1278 if (!Name.consume_front(
"llvm.") || Name.empty())
1284 bool IsArm = Name.consume_front(
"arm.");
1285 if (IsArm || Name.consume_front(
"aarch64.")) {
1291 if (Name.consume_front(
"amdgcn.")) {
1292 if (Name ==
"alignbit") {
1295 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1299 if (Name.consume_front(
"atomic.")) {
1300 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1301 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1310 switch (
F->getIntrinsicID()) {
1314 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1315 if (
F->arg_size() == 7) {
1320 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1321 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1322 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1323 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1324 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1325 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1326 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1327 if (
F->arg_size() == 8) {
1334 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1335 Name.consume_front(
"flat.atomic.")) {
1336 if (Name.starts_with(
"fadd") ||
1338 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1339 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1347 if (Name.starts_with(
"ldexp.")) {
1350 F->getParent(), Intrinsic::ldexp,
1351 {F->getReturnType(), F->getArg(1)->getType()});
1360 if (
F->arg_size() == 1) {
1361 if (Name.consume_front(
"convert.")) {
1375 F->arg_begin()->getType());
1380 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1383 Intrinsic::coro_end);
1390 if (Name.consume_front(
"dbg.")) {
1392 if (CanUpgradeDebugIntrinsicsToRecords) {
1393 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1394 Name ==
"declare" || Name ==
"label") {
1403 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1406 Intrinsic::dbg_value);
1413 if (Name.consume_front(
"experimental.vector.")) {
1419 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1420 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1421 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1422 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1423 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1425 Intrinsic::vector_partial_reduce_add)
1428 const auto *FT =
F->getFunctionType();
1430 if (
ID == Intrinsic::vector_extract ||
1431 ID == Intrinsic::vector_interleave2)
1434 if (
ID != Intrinsic::vector_interleave2)
1436 if (
ID == Intrinsic::vector_insert ||
1437 ID == Intrinsic::vector_partial_reduce_add)
1445 if (Name.consume_front(
"reduce.")) {
1447 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1448 if (R.match(Name, &
Groups))
1450 .
Case(
"add", Intrinsic::vector_reduce_add)
1451 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1452 .
Case(
"and", Intrinsic::vector_reduce_and)
1453 .
Case(
"or", Intrinsic::vector_reduce_or)
1454 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1455 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1456 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1457 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1458 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1459 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1460 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1465 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1470 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1471 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1476 auto Args =
F->getFunctionType()->params();
1478 {Args[V2 ? 1 : 0]});
1484 if (Name.consume_front(
"splice"))
1488 if (Name.consume_front(
"experimental.stepvector.")) {
1492 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1497 if (Name.starts_with(
"flt.rounds")) {
1500 Intrinsic::get_rounding);
1505 if (Name.starts_with(
"invariant.group.barrier")) {
1507 auto Args =
F->getFunctionType()->params();
1508 Type* ObjectPtr[1] = {Args[0]};
1511 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1516 if ((Name.starts_with(
"lifetime.start") ||
1517 Name.starts_with(
"lifetime.end")) &&
1518 F->arg_size() == 2) {
1520 ? Intrinsic::lifetime_start
1521 : Intrinsic::lifetime_end;
1524 F->getArg(0)->getType());
1533 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1534 .StartsWith(
"memmove.", Intrinsic::memmove)
1536 if (
F->arg_size() == 5) {
1540 F->getFunctionType()->params().slice(0, 3);
1546 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1549 const auto *FT =
F->getFunctionType();
1550 Type *ParamTypes[2] = {
1551 FT->getParamType(0),
1555 Intrinsic::memset, ParamTypes);
1561 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1562 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1563 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1564 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1566 if (MaskedID &&
F->arg_size() == 4) {
1568 if (MaskedID == Intrinsic::masked_load ||
1569 MaskedID == Intrinsic::masked_gather) {
1571 F->getParent(), MaskedID,
1572 {F->getReturnType(), F->getArg(0)->getType()});
1576 F->getParent(), MaskedID,
1577 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1583 if (Name.consume_front(
"nvvm.")) {
1585 if (
F->arg_size() == 1) {
1588 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1589 .Case(
"clz.i", Intrinsic::ctlz)
1590 .
Case(
"popc.i", Intrinsic::ctpop)
1594 {F->getReturnType()});
1597 }
else if (
F->arg_size() == 2) {
1600 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1601 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1602 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1603 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1607 {F->getReturnType()});
1613 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1641 bool Expand =
false;
1642 if (Name.consume_front(
"abs."))
1645 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1646 else if (Name.consume_front(
"fabs."))
1648 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1649 else if (Name.consume_front(
"ex2.approx."))
1652 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1653 else if (Name.consume_front(
"atomic.load."))
1662 else if (Name.consume_front(
"bitcast."))
1665 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1666 else if (Name.consume_front(
"rotate."))
1668 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1669 else if (Name.consume_front(
"ptr.gen.to."))
1672 else if (Name.consume_front(
"ptr."))
1675 else if (Name.consume_front(
"ldg.global."))
1677 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1678 Name.starts_with(
"p."));
1681 .
Case(
"barrier0",
true)
1682 .
Case(
"barrier.n",
true)
1683 .
Case(
"barrier.sync.cnt",
true)
1684 .
Case(
"barrier.sync",
true)
1685 .
Case(
"barrier",
true)
1686 .
Case(
"bar.sync",
true)
1687 .
Case(
"barrier0.popc",
true)
1688 .
Case(
"barrier0.and",
true)
1689 .
Case(
"barrier0.or",
true)
1690 .
Case(
"clz.ll",
true)
1691 .
Case(
"popc.ll",
true)
1693 .
Case(
"swap.lo.hi.b64",
true)
1694 .
Case(
"tanh.approx.f32",
true)
1706 if (Name.starts_with(
"objectsize.")) {
1707 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1708 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1711 Intrinsic::objectsize, Tys);
1718 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1721 F->getParent(), Intrinsic::ptr_annotation,
1722 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1728 if (Name.consume_front(
"riscv.")) {
1731 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1732 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1733 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1734 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1737 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1750 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1751 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1760 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1761 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1762 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1763 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1768 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1777 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1779 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1788 if (Name ==
"stackprotectorcheck") {
1795 if (Name ==
"thread.pointer") {
1797 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1803 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1806 F->getParent(), Intrinsic::var_annotation,
1807 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1810 if (Name.consume_front(
"vector.splice")) {
1811 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1819 if (Name.consume_front(
"wasm.")) {
1822 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1823 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1824 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1829 F->getReturnType());
1833 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1835 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1837 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1856 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1865 auto *FT =
F->getFunctionType();
1868 std::string
Name =
F->getName().str();
1871 Name,
F->getParent());
1882 if (Result != std::nullopt) {
1895 bool CanUpgradeDebugIntrinsicsToRecords) {
1915 GV->
getName() ==
"llvm.global_dtors")) ||
1930 unsigned N =
Init->getNumOperands();
1931 std::vector<Constant *> NewCtors(
N);
1932 for (
unsigned i = 0; i !=
N; ++i) {
1935 Ctor->getAggregateElement(1),
1949 unsigned NumElts = ResultTy->getNumElements() * 8;
1953 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1963 for (
unsigned l = 0; l != NumElts; l += 16)
1964 for (
unsigned i = 0; i != 16; ++i) {
1965 unsigned Idx = NumElts + i - Shift;
1967 Idx -= NumElts - 16;
1968 Idxs[l + i] = Idx + l;
1971 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1975 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1983 unsigned NumElts = ResultTy->getNumElements() * 8;
1987 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1997 for (
unsigned l = 0; l != NumElts; l += 16)
1998 for (
unsigned i = 0; i != 16; ++i) {
1999 unsigned Idx = i + Shift;
2001 Idx += NumElts - 16;
2002 Idxs[l + i] = Idx + l;
2005 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2009 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2017 Mask = Builder.CreateBitCast(Mask, MaskTy);
2023 for (
unsigned i = 0; i != NumElts; ++i)
2025 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2036 if (
C->isAllOnesValue())
2041 return Builder.CreateSelect(Mask, Op0, Op1);
2048 if (
C->isAllOnesValue())
2052 Mask->getType()->getIntegerBitWidth());
2053 Mask = Builder.CreateBitCast(Mask, MaskTy);
2054 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2055 return Builder.CreateSelect(Mask, Op0, Op1);
2068 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2069 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2074 ShiftVal &= (NumElts - 1);
2083 if (ShiftVal > 16) {
2091 for (
unsigned l = 0; l < NumElts; l += 16) {
2092 for (
unsigned i = 0; i != 16; ++i) {
2093 unsigned Idx = ShiftVal + i;
2094 if (!IsVALIGN && Idx >= 16)
2095 Idx += NumElts - 16;
2096 Indices[l + i] = Idx + l;
2101 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2107 bool ZeroMask,
bool IndexForm) {
2110 unsigned EltWidth = Ty->getScalarSizeInBits();
2111 bool IsFloat = Ty->isFPOrFPVectorTy();
2113 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2114 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2115 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2116 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2117 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2118 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2119 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2120 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2121 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2123 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2125 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2127 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2129 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2131 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2133 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2135 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2137 else if (VecWidth == 128 && EltWidth == 16)
2138 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2139 else if (VecWidth == 256 && EltWidth == 16)
2140 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2141 else if (VecWidth == 512 && EltWidth == 16)
2142 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2143 else if (VecWidth == 128 && EltWidth == 8)
2144 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2145 else if (VecWidth == 256 && EltWidth == 8)
2146 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2147 else if (VecWidth == 512 && EltWidth == 8)
2148 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2159 Value *V = Builder.CreateIntrinsic(IID, Args);
2171 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2182 bool IsRotateRight) {
2192 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2193 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2196 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2197 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2242 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2247 bool IsShiftRight,
bool ZeroMask) {
2261 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2262 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2265 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2266 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2281 const Align Alignment =
2283 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2288 if (
C->isAllOnesValue())
2289 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2294 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2300 const Align Alignment =
2309 if (
C->isAllOnesValue())
2310 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2315 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2321 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2322 {Op0, Builder.getInt1(
false)});
2337 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2338 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2339 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2340 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2341 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2344 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2345 LHS = Builder.CreateAnd(
LHS, Mask);
2346 RHS = Builder.CreateAnd(
RHS, Mask);
2363 if (!
C || !
C->isAllOnesValue())
2364 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2369 for (
unsigned i = 0; i != NumElts; ++i)
2371 for (
unsigned i = NumElts; i != 8; ++i)
2372 Indices[i] = NumElts + i % NumElts;
2373 Vec = Builder.CreateShuffleVector(Vec,
2377 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2381 unsigned CC,
bool Signed) {
2389 }
else if (CC == 7) {
2425 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2426 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2428 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2429 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2438 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2444 Name = Name.substr(12);
2449 if (Name.starts_with(
"max.p")) {
2450 if (VecWidth == 128 && EltWidth == 32)
2451 IID = Intrinsic::x86_sse_max_ps;
2452 else if (VecWidth == 128 && EltWidth == 64)
2453 IID = Intrinsic::x86_sse2_max_pd;
2454 else if (VecWidth == 256 && EltWidth == 32)
2455 IID = Intrinsic::x86_avx_max_ps_256;
2456 else if (VecWidth == 256 && EltWidth == 64)
2457 IID = Intrinsic::x86_avx_max_pd_256;
2460 }
else if (Name.starts_with(
"min.p")) {
2461 if (VecWidth == 128 && EltWidth == 32)
2462 IID = Intrinsic::x86_sse_min_ps;
2463 else if (VecWidth == 128 && EltWidth == 64)
2464 IID = Intrinsic::x86_sse2_min_pd;
2465 else if (VecWidth == 256 && EltWidth == 32)
2466 IID = Intrinsic::x86_avx_min_ps_256;
2467 else if (VecWidth == 256 && EltWidth == 64)
2468 IID = Intrinsic::x86_avx_min_pd_256;
2471 }
else if (Name.starts_with(
"pshuf.b.")) {
2472 if (VecWidth == 128)
2473 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2474 else if (VecWidth == 256)
2475 IID = Intrinsic::x86_avx2_pshuf_b;
2476 else if (VecWidth == 512)
2477 IID = Intrinsic::x86_avx512_pshuf_b_512;
2480 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2481 if (VecWidth == 128)
2482 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2483 else if (VecWidth == 256)
2484 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2485 else if (VecWidth == 512)
2486 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2489 }
else if (Name.starts_with(
"pmulh.w.")) {
2490 if (VecWidth == 128)
2491 IID = Intrinsic::x86_sse2_pmulh_w;
2492 else if (VecWidth == 256)
2493 IID = Intrinsic::x86_avx2_pmulh_w;
2494 else if (VecWidth == 512)
2495 IID = Intrinsic::x86_avx512_pmulh_w_512;
2498 }
else if (Name.starts_with(
"pmulhu.w.")) {
2499 if (VecWidth == 128)
2500 IID = Intrinsic::x86_sse2_pmulhu_w;
2501 else if (VecWidth == 256)
2502 IID = Intrinsic::x86_avx2_pmulhu_w;
2503 else if (VecWidth == 512)
2504 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2507 }
else if (Name.starts_with(
"pmaddw.d.")) {
2508 if (VecWidth == 128)
2509 IID = Intrinsic::x86_sse2_pmadd_wd;
2510 else if (VecWidth == 256)
2511 IID = Intrinsic::x86_avx2_pmadd_wd;
2512 else if (VecWidth == 512)
2513 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2516 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2517 if (VecWidth == 128)
2518 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2519 else if (VecWidth == 256)
2520 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2521 else if (VecWidth == 512)
2522 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2525 }
else if (Name.starts_with(
"packsswb.")) {
2526 if (VecWidth == 128)
2527 IID = Intrinsic::x86_sse2_packsswb_128;
2528 else if (VecWidth == 256)
2529 IID = Intrinsic::x86_avx2_packsswb;
2530 else if (VecWidth == 512)
2531 IID = Intrinsic::x86_avx512_packsswb_512;
2534 }
else if (Name.starts_with(
"packssdw.")) {
2535 if (VecWidth == 128)
2536 IID = Intrinsic::x86_sse2_packssdw_128;
2537 else if (VecWidth == 256)
2538 IID = Intrinsic::x86_avx2_packssdw;
2539 else if (VecWidth == 512)
2540 IID = Intrinsic::x86_avx512_packssdw_512;
2543 }
else if (Name.starts_with(
"packuswb.")) {
2544 if (VecWidth == 128)
2545 IID = Intrinsic::x86_sse2_packuswb_128;
2546 else if (VecWidth == 256)
2547 IID = Intrinsic::x86_avx2_packuswb;
2548 else if (VecWidth == 512)
2549 IID = Intrinsic::x86_avx512_packuswb_512;
2552 }
else if (Name.starts_with(
"packusdw.")) {
2553 if (VecWidth == 128)
2554 IID = Intrinsic::x86_sse41_packusdw;
2555 else if (VecWidth == 256)
2556 IID = Intrinsic::x86_avx2_packusdw;
2557 else if (VecWidth == 512)
2558 IID = Intrinsic::x86_avx512_packusdw_512;
2561 }
else if (Name.starts_with(
"vpermilvar.")) {
2562 if (VecWidth == 128 && EltWidth == 32)
2563 IID = Intrinsic::x86_avx_vpermilvar_ps;
2564 else if (VecWidth == 128 && EltWidth == 64)
2565 IID = Intrinsic::x86_avx_vpermilvar_pd;
2566 else if (VecWidth == 256 && EltWidth == 32)
2567 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2568 else if (VecWidth == 256 && EltWidth == 64)
2569 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2570 else if (VecWidth == 512 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2572 else if (VecWidth == 512 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2576 }
else if (Name ==
"cvtpd2dq.256") {
2577 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2578 }
else if (Name ==
"cvtpd2ps.256") {
2579 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2580 }
else if (Name ==
"cvttpd2dq.256") {
2581 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2582 }
else if (Name ==
"cvttps2dq.128") {
2583 IID = Intrinsic::x86_sse2_cvttps2dq;
2584 }
else if (Name ==
"cvttps2dq.256") {
2585 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2586 }
else if (Name.starts_with(
"permvar.")) {
2588 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2589 IID = Intrinsic::x86_avx2_permps;
2590 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2591 IID = Intrinsic::x86_avx2_permd;
2592 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2593 IID = Intrinsic::x86_avx512_permvar_df_256;
2594 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2595 IID = Intrinsic::x86_avx512_permvar_di_256;
2596 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx512_permvar_sf_512;
2598 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx512_permvar_si_512;
2600 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_512;
2602 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_512;
2604 else if (VecWidth == 128 && EltWidth == 16)
2605 IID = Intrinsic::x86_avx512_permvar_hi_128;
2606 else if (VecWidth == 256 && EltWidth == 16)
2607 IID = Intrinsic::x86_avx512_permvar_hi_256;
2608 else if (VecWidth == 512 && EltWidth == 16)
2609 IID = Intrinsic::x86_avx512_permvar_hi_512;
2610 else if (VecWidth == 128 && EltWidth == 8)
2611 IID = Intrinsic::x86_avx512_permvar_qi_128;
2612 else if (VecWidth == 256 && EltWidth == 8)
2613 IID = Intrinsic::x86_avx512_permvar_qi_256;
2614 else if (VecWidth == 512 && EltWidth == 8)
2615 IID = Intrinsic::x86_avx512_permvar_qi_512;
2618 }
else if (Name.starts_with(
"dbpsadbw.")) {
2619 if (VecWidth == 128)
2620 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2621 else if (VecWidth == 256)
2622 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2623 else if (VecWidth == 512)
2624 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2627 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2628 if (VecWidth == 128)
2629 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2630 else if (VecWidth == 256)
2631 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2632 else if (VecWidth == 512)
2633 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2636 }
else if (Name.starts_with(
"conflict.")) {
2637 if (Name[9] ==
'd' && VecWidth == 128)
2638 IID = Intrinsic::x86_avx512_conflict_d_128;
2639 else if (Name[9] ==
'd' && VecWidth == 256)
2640 IID = Intrinsic::x86_avx512_conflict_d_256;
2641 else if (Name[9] ==
'd' && VecWidth == 512)
2642 IID = Intrinsic::x86_avx512_conflict_d_512;
2643 else if (Name[9] ==
'q' && VecWidth == 128)
2644 IID = Intrinsic::x86_avx512_conflict_q_128;
2645 else if (Name[9] ==
'q' && VecWidth == 256)
2646 IID = Intrinsic::x86_avx512_conflict_q_256;
2647 else if (Name[9] ==
'q' && VecWidth == 512)
2648 IID = Intrinsic::x86_avx512_conflict_q_512;
2651 }
else if (Name.starts_with(
"pavg.")) {
2652 if (Name[5] ==
'b' && VecWidth == 128)
2653 IID = Intrinsic::x86_sse2_pavg_b;
2654 else if (Name[5] ==
'b' && VecWidth == 256)
2655 IID = Intrinsic::x86_avx2_pavg_b;
2656 else if (Name[5] ==
'b' && VecWidth == 512)
2657 IID = Intrinsic::x86_avx512_pavg_b_512;
2658 else if (Name[5] ==
'w' && VecWidth == 128)
2659 IID = Intrinsic::x86_sse2_pavg_w;
2660 else if (Name[5] ==
'w' && VecWidth == 256)
2661 IID = Intrinsic::x86_avx2_pavg_w;
2662 else if (Name[5] ==
'w' && VecWidth == 512)
2663 IID = Intrinsic::x86_avx512_pavg_w_512;
2672 Rep = Builder.CreateIntrinsic(IID, Args);
2683 if (AsmStr->find(
"mov\tfp") == 0 &&
2684 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2685 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2686 AsmStr->replace(Pos, 1,
";");
2692 Value *Rep =
nullptr;
2694 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2696 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2697 Value *Cmp = Builder.CreateICmpSGE(
2699 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2700 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2701 Type *Ty = (Name ==
"abs.bf16")
2705 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2706 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2707 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2708 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2709 : Intrinsic::nvvm_fabs;
2710 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2711 }
else if (Name.consume_front(
"ex2.approx.")) {
2713 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2714 : Intrinsic::nvvm_ex2_approx;
2715 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2716 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2717 Name.starts_with(
"atomic.load.add.f64.p")) {
2722 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2723 Name.starts_with(
"atomic.load.dec.32.p")) {
2728 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2730 }
else if (Name ==
"clz.ll") {
2733 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2734 {Arg, Builder.getFalse()},
2736 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2737 }
else if (Name ==
"popc.ll") {
2741 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2742 Arg,
nullptr,
"ctpop");
2743 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2744 }
else if (Name ==
"h2f") {
2746 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2747 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2748 }
else if (Name.consume_front(
"bitcast.") &&
2749 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2752 }
else if (Name ==
"rotate.b32") {
2755 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2756 {Arg, Arg, ShiftAmt});
2757 }
else if (Name ==
"rotate.b64") {
2761 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2762 {Arg, Arg, ZExtShiftAmt});
2763 }
else if (Name ==
"rotate.right.b64") {
2767 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2768 {Arg, Arg, ZExtShiftAmt});
2769 }
else if (Name ==
"swap.lo.hi.b64") {
2772 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2773 {Arg, Arg, Builder.getInt64(32)});
2774 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2777 Name.starts_with(
".to.gen"))) {
2779 }
else if (Name.consume_front(
"ldg.global")) {
2783 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2786 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2788 }
else if (Name ==
"tanh.approx.f32") {
2792 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2794 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2796 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2797 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2799 }
else if (Name ==
"barrier") {
2800 Rep = Builder.CreateIntrinsic(
2801 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2803 }
else if (Name ==
"barrier.sync") {
2804 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2806 }
else if (Name ==
"barrier.sync.cnt") {
2807 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2809 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2810 Name ==
"barrier0.or") {
2812 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2816 .
Case(
"barrier0.popc",
2817 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2818 .
Case(
"barrier0.and",
2819 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2820 .
Case(
"barrier0.or",
2821 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2822 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2823 Rep = Builder.CreateZExt(Bar, CI->
getType());
2827 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2837 ? Builder.CreateBitCast(Arg, NewType)
2840 Rep = Builder.CreateCall(NewFn, Args);
2841 if (
F->getReturnType()->isIntegerTy())
2842 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2852 Value *Rep =
nullptr;
2854 if (Name.starts_with(
"sse4a.movnt.")) {
2866 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2869 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2870 }
else if (Name.starts_with(
"avx.movnt.") ||
2871 Name.starts_with(
"avx512.storent.")) {
2883 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2884 }
else if (Name ==
"sse2.storel.dq") {
2889 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2890 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2891 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2892 }
else if (Name.starts_with(
"sse.storeu.") ||
2893 Name.starts_with(
"sse2.storeu.") ||
2894 Name.starts_with(
"avx.storeu.")) {
2897 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2898 }
else if (Name ==
"avx512.mask.store.ss") {
2902 }
else if (Name.starts_with(
"avx512.mask.store")) {
2904 bool Aligned = Name[17] !=
'u';
2907 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2910 bool CmpEq = Name[9] ==
'e';
2913 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2914 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2921 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2922 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2924 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2925 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2926 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2927 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2928 Name.starts_with(
"sse2.sqrt.p") ||
2929 Name.starts_with(
"sse.sqrt.p")) {
2930 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2931 {CI->getArgOperand(0)});
2932 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2936 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2937 : Intrinsic::x86_avx512_sqrt_pd_512;
2940 Rep = Builder.CreateIntrinsic(IID, Args);
2942 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2943 {CI->getArgOperand(0)});
2947 }
else if (Name.starts_with(
"avx512.ptestm") ||
2948 Name.starts_with(
"avx512.ptestnm")) {
2952 Rep = Builder.CreateAnd(Op0, Op1);
2958 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2960 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2963 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2966 }
else if (Name.starts_with(
"avx512.kunpck")) {
2971 for (
unsigned i = 0; i != NumElts; ++i)
2980 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2981 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2982 }
else if (Name ==
"avx512.kand.w") {
2985 Rep = Builder.CreateAnd(
LHS,
RHS);
2986 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2987 }
else if (Name ==
"avx512.kandn.w") {
2990 LHS = Builder.CreateNot(
LHS);
2991 Rep = Builder.CreateAnd(
LHS,
RHS);
2992 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2993 }
else if (Name ==
"avx512.kor.w") {
2996 Rep = Builder.CreateOr(
LHS,
RHS);
2997 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2998 }
else if (Name ==
"avx512.kxor.w") {
3001 Rep = Builder.CreateXor(
LHS,
RHS);
3002 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3003 }
else if (Name ==
"avx512.kxnor.w") {
3006 LHS = Builder.CreateNot(
LHS);
3007 Rep = Builder.CreateXor(
LHS,
RHS);
3008 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3009 }
else if (Name ==
"avx512.knot.w") {
3011 Rep = Builder.CreateNot(Rep);
3012 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3013 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3016 Rep = Builder.CreateOr(
LHS,
RHS);
3017 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3019 if (Name[14] ==
'c')
3023 Rep = Builder.CreateICmpEQ(Rep,
C);
3024 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3025 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3026 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3027 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3028 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3031 ConstantInt::get(I32Ty, 0));
3033 ConstantInt::get(I32Ty, 0));
3035 if (Name.contains(
".add."))
3036 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3037 else if (Name.contains(
".sub."))
3038 EltOp = Builder.CreateFSub(Elt0, Elt1);
3039 else if (Name.contains(
".mul."))
3040 EltOp = Builder.CreateFMul(Elt0, Elt1);
3042 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3043 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3044 ConstantInt::get(I32Ty, 0));
3045 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3047 bool CmpEq = Name[16] ==
'e';
3049 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3058 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3061 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3064 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3071 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3074 unsigned EltWidth = OpTy->getScalarSizeInBits();
3076 if (VecWidth == 128 && EltWidth == 32)
3077 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3078 else if (VecWidth == 256 && EltWidth == 32)
3079 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3080 else if (VecWidth == 512 && EltWidth == 32)
3081 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3082 else if (VecWidth == 128 && EltWidth == 64)
3083 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3084 else if (VecWidth == 256 && EltWidth == 64)
3085 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3086 else if (VecWidth == 512 && EltWidth == 64)
3087 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3094 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3096 Type *OpTy = Args[0]->getType();
3097 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3098 unsigned EltWidth = OpTy->getScalarSizeInBits();
3100 if (VecWidth == 128 && EltWidth == 32)
3101 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3102 else if (VecWidth == 256 && EltWidth == 32)
3103 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3104 else if (VecWidth == 512 && EltWidth == 32)
3105 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3106 else if (VecWidth == 128 && EltWidth == 64)
3107 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3108 else if (VecWidth == 256 && EltWidth == 64)
3109 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3110 else if (VecWidth == 512 && EltWidth == 64)
3111 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3116 if (VecWidth == 512)
3118 Args.push_back(Mask);
3120 Rep = Builder.CreateIntrinsic(IID, Args);
3121 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3125 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3128 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3129 Name.starts_with(
"avx512.cvtw2mask.") ||
3130 Name.starts_with(
"avx512.cvtd2mask.") ||
3131 Name.starts_with(
"avx512.cvtq2mask.")) {
3136 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3137 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3138 Name.starts_with(
"avx512.mask.pabs")) {
3140 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3141 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3142 Name.starts_with(
"avx512.mask.pmaxs")) {
3144 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3145 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3146 Name.starts_with(
"avx512.mask.pmaxu")) {
3148 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3149 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3150 Name.starts_with(
"avx512.mask.pmins")) {
3152 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3153 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3154 Name.starts_with(
"avx512.mask.pminu")) {
3156 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3157 Name ==
"avx512.pmulu.dq.512" ||
3158 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3160 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3161 Name ==
"avx512.pmul.dq.512" ||
3162 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3164 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3165 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3170 }
else if (Name ==
"avx512.cvtusi2sd") {
3175 }
else if (Name ==
"sse2.cvtss2sd") {
3177 Rep = Builder.CreateFPExt(
3180 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3181 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3182 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3183 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3184 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3185 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3186 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3187 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3188 Name ==
"avx512.mask.cvtqq2ps.256" ||
3189 Name ==
"avx512.mask.cvtqq2ps.512" ||
3190 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3191 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3192 Name ==
"avx.cvt.ps2.pd.256" ||
3193 Name ==
"avx512.mask.cvtps2pd.128" ||
3194 Name ==
"avx512.mask.cvtps2pd.256") {
3199 unsigned NumDstElts = DstTy->getNumElements();
3201 assert(NumDstElts == 2 &&
"Unexpected vector size");
3202 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3205 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3206 bool IsUnsigned = Name.contains(
"cvtu");
3208 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3212 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3213 : Intrinsic::x86_avx512_sitofp_round;
3214 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3217 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3218 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3224 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3225 Name.starts_with(
"vcvtph2ps.")) {
3229 unsigned NumDstElts = DstTy->getNumElements();
3230 if (NumDstElts != SrcTy->getNumElements()) {
3231 assert(NumDstElts == 4 &&
"Unexpected vector size");
3232 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3234 Rep = Builder.CreateBitCast(
3236 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3240 }
else if (Name.starts_with(
"avx512.mask.load")) {
3242 bool Aligned = Name[16] !=
'u';
3245 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3248 ResultTy->getNumElements());
3250 Rep = Builder.CreateIntrinsic(
3251 Intrinsic::masked_expandload, ResultTy,
3253 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3259 Rep = Builder.CreateIntrinsic(
3260 Intrinsic::masked_compressstore, ResultTy,
3262 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3263 Name.starts_with(
"avx512.mask.expand.")) {
3267 ResultTy->getNumElements());
3269 bool IsCompress = Name[12] ==
'c';
3270 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3271 : Intrinsic::x86_avx512_mask_expand;
3272 Rep = Builder.CreateIntrinsic(
3274 }
else if (Name.starts_with(
"xop.vpcom")) {
3276 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3277 Name.ends_with(
"uq"))
3279 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3280 Name.ends_with(
"d") || Name.ends_with(
"q"))
3289 Name = Name.substr(9);
3290 if (Name.starts_with(
"lt"))
3292 else if (Name.starts_with(
"le"))
3294 else if (Name.starts_with(
"gt"))
3296 else if (Name.starts_with(
"ge"))
3298 else if (Name.starts_with(
"eq"))
3300 else if (Name.starts_with(
"ne"))
3302 else if (Name.starts_with(
"false"))
3304 else if (Name.starts_with(
"true"))
3311 }
else if (Name.starts_with(
"xop.vpcmov")) {
3313 Value *NotSel = Builder.CreateNot(Sel);
3316 Rep = Builder.CreateOr(Sel0, Sel1);
3317 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3318 Name.starts_with(
"avx512.mask.prol")) {
3320 }
else if (Name.starts_with(
"avx512.pror") ||
3321 Name.starts_with(
"avx512.mask.pror")) {
3323 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3324 Name.starts_with(
"avx512.mask.vpshld") ||
3325 Name.starts_with(
"avx512.maskz.vpshld")) {
3326 bool ZeroMask = Name[11] ==
'z';
3328 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3329 Name.starts_with(
"avx512.mask.vpshrd") ||
3330 Name.starts_with(
"avx512.maskz.vpshrd")) {
3331 bool ZeroMask = Name[11] ==
'z';
3333 }
else if (Name ==
"sse42.crc32.64.8") {
3336 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3338 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3339 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3340 Name.starts_with(
"avx512.vbroadcast.s")) {
3343 Type *EltTy = VecTy->getElementType();
3344 unsigned EltNum = VecTy->getNumElements();
3348 for (
unsigned I = 0;
I < EltNum; ++
I)
3349 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3350 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3351 Name.starts_with(
"sse41.pmovzx") ||
3352 Name.starts_with(
"avx2.pmovsx") ||
3353 Name.starts_with(
"avx2.pmovzx") ||
3354 Name.starts_with(
"avx512.mask.pmovsx") ||
3355 Name.starts_with(
"avx512.mask.pmovzx")) {
3357 unsigned NumDstElts = DstTy->getNumElements();
3361 for (
unsigned i = 0; i != NumDstElts; ++i)
3366 bool DoSext = Name.contains(
"pmovsx");
3368 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3373 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3374 Name ==
"avx512.mask.pmov.qd.512" ||
3375 Name ==
"avx512.mask.pmov.wb.256" ||
3376 Name ==
"avx512.mask.pmov.wb.512") {
3381 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3382 Name ==
"avx2.vbroadcasti128") {
3388 if (NumSrcElts == 2)
3389 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3391 Rep = Builder.CreateShuffleVector(Load,
3393 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3394 Name.starts_with(
"avx512.mask.shuf.f")) {
3399 unsigned ControlBitsMask = NumLanes - 1;
3400 unsigned NumControlBits = NumLanes / 2;
3403 for (
unsigned l = 0; l != NumLanes; ++l) {
3404 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3406 if (l >= NumLanes / 2)
3407 LaneMask += NumLanes;
3408 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3409 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3415 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3416 Name.starts_with(
"avx512.mask.broadcasti")) {
3419 unsigned NumDstElts =
3423 for (
unsigned i = 0; i != NumDstElts; ++i)
3424 ShuffleMask[i] = i % NumSrcElts;
3430 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3431 Name.starts_with(
"avx2.vbroadcast") ||
3432 Name.starts_with(
"avx512.pbroadcast") ||
3433 Name.starts_with(
"avx512.mask.broadcast.s")) {
3440 Rep = Builder.CreateShuffleVector(
Op, M);
3445 }
else if (Name.starts_with(
"sse2.padds.") ||
3446 Name.starts_with(
"avx2.padds.") ||
3447 Name.starts_with(
"avx512.padds.") ||
3448 Name.starts_with(
"avx512.mask.padds.")) {
3450 }
else if (Name.starts_with(
"sse2.psubs.") ||
3451 Name.starts_with(
"avx2.psubs.") ||
3452 Name.starts_with(
"avx512.psubs.") ||
3453 Name.starts_with(
"avx512.mask.psubs.")) {
3455 }
else if (Name.starts_with(
"sse2.paddus.") ||
3456 Name.starts_with(
"avx2.paddus.") ||
3457 Name.starts_with(
"avx512.mask.paddus.")) {
3459 }
else if (Name.starts_with(
"sse2.psubus.") ||
3460 Name.starts_with(
"avx2.psubus.") ||
3461 Name.starts_with(
"avx512.mask.psubus.")) {
3463 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3468 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3472 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3477 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3482 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3483 Name ==
"avx512.psll.dq.512") {
3487 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3488 Name ==
"avx512.psrl.dq.512") {
3492 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3493 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3494 Name.starts_with(
"avx2.pblendd.")) {
3499 unsigned NumElts = VecTy->getNumElements();
3502 for (
unsigned i = 0; i != NumElts; ++i)
3503 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3505 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3506 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3507 Name ==
"avx2.vinserti128" ||
3508 Name.starts_with(
"avx512.mask.insert")) {
3512 unsigned DstNumElts =
3514 unsigned SrcNumElts =
3516 unsigned Scale = DstNumElts / SrcNumElts;
3523 for (
unsigned i = 0; i != SrcNumElts; ++i)
3525 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3526 Idxs[i] = SrcNumElts;
3527 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3541 for (
unsigned i = 0; i != DstNumElts; ++i)
3544 for (
unsigned i = 0; i != SrcNumElts; ++i)
3545 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3546 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3552 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3553 Name ==
"avx2.vextracti128" ||
3554 Name.starts_with(
"avx512.mask.vextract")) {
3557 unsigned DstNumElts =
3559 unsigned SrcNumElts =
3561 unsigned Scale = SrcNumElts / DstNumElts;
3568 for (
unsigned i = 0; i != DstNumElts; ++i) {
3569 Idxs[i] = i + (Imm * DstNumElts);
3571 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3577 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3578 Name.starts_with(
"avx512.mask.perm.di.")) {
3582 unsigned NumElts = VecTy->getNumElements();
3585 for (
unsigned i = 0; i != NumElts; ++i)
3586 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3588 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3593 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3605 unsigned HalfSize = NumElts / 2;
3617 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3618 for (
unsigned i = 0; i < HalfSize; ++i)
3619 ShuffleMask[i] = StartIndex + i;
3622 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3623 for (
unsigned i = 0; i < HalfSize; ++i)
3624 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3626 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3628 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3629 Name.starts_with(
"avx512.mask.vpermil.p") ||
3630 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3634 unsigned NumElts = VecTy->getNumElements();
3636 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3637 unsigned IdxMask = ((1 << IdxSize) - 1);
3643 for (
unsigned i = 0; i != NumElts; ++i)
3644 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3646 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3651 }
else if (Name ==
"sse2.pshufl.w" ||
3652 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3657 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3661 for (
unsigned l = 0; l != NumElts; l += 8) {
3662 for (
unsigned i = 0; i != 4; ++i)
3663 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3664 for (
unsigned i = 4; i != 8; ++i)
3665 Idxs[i + l] = i + l;
3668 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3673 }
else if (Name ==
"sse2.pshufh.w" ||
3674 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3679 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3683 for (
unsigned l = 0; l != NumElts; l += 8) {
3684 for (
unsigned i = 0; i != 4; ++i)
3685 Idxs[i + l] = i + l;
3686 for (
unsigned i = 0; i != 4; ++i)
3687 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3690 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3695 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3702 unsigned HalfLaneElts = NumLaneElts / 2;
3705 for (
unsigned i = 0; i != NumElts; ++i) {
3707 Idxs[i] = i - (i % NumLaneElts);
3709 if ((i % NumLaneElts) >= HalfLaneElts)
3713 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3716 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3720 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3721 Name.starts_with(
"avx512.mask.movshdup") ||
3722 Name.starts_with(
"avx512.mask.movsldup")) {
3728 if (Name.starts_with(
"avx512.mask.movshdup."))
3732 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3733 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3734 Idxs[i + l + 0] = i + l +
Offset;
3735 Idxs[i + l + 1] = i + l +
Offset;
3738 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3742 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3743 Name.starts_with(
"avx512.mask.unpckl.")) {
3750 for (
int l = 0; l != NumElts; l += NumLaneElts)
3751 for (
int i = 0; i != NumLaneElts; ++i)
3752 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3754 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3758 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3759 Name.starts_with(
"avx512.mask.unpckh.")) {
3766 for (
int l = 0; l != NumElts; l += NumLaneElts)
3767 for (
int i = 0; i != NumLaneElts; ++i)
3768 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3770 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3774 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3775 Name.starts_with(
"avx512.mask.pand.")) {
3778 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3780 Rep = Builder.CreateBitCast(Rep, FTy);
3783 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3784 Name.starts_with(
"avx512.mask.pandn.")) {
3787 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3788 Rep = Builder.CreateAnd(Rep,
3790 Rep = Builder.CreateBitCast(Rep, FTy);
3793 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3794 Name.starts_with(
"avx512.mask.por.")) {
3797 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3799 Rep = Builder.CreateBitCast(Rep, FTy);
3802 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3803 Name.starts_with(
"avx512.mask.pxor.")) {
3806 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3808 Rep = Builder.CreateBitCast(Rep, FTy);
3811 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3815 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3819 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3823 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3824 if (Name.ends_with(
".512")) {
3826 if (Name[17] ==
's')
3827 IID = Intrinsic::x86_avx512_add_ps_512;
3829 IID = Intrinsic::x86_avx512_add_pd_512;
3831 Rep = Builder.CreateIntrinsic(
3839 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3840 if (Name.ends_with(
".512")) {
3842 if (Name[17] ==
's')
3843 IID = Intrinsic::x86_avx512_div_ps_512;
3845 IID = Intrinsic::x86_avx512_div_pd_512;
3847 Rep = Builder.CreateIntrinsic(
3855 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3856 if (Name.ends_with(
".512")) {
3858 if (Name[17] ==
's')
3859 IID = Intrinsic::x86_avx512_mul_ps_512;
3861 IID = Intrinsic::x86_avx512_mul_pd_512;
3863 Rep = Builder.CreateIntrinsic(
3871 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3872 if (Name.ends_with(
".512")) {
3874 if (Name[17] ==
's')
3875 IID = Intrinsic::x86_avx512_sub_ps_512;
3877 IID = Intrinsic::x86_avx512_sub_pd_512;
3879 Rep = Builder.CreateIntrinsic(
3887 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3888 Name.starts_with(
"avx512.mask.min.p")) &&
3889 Name.drop_front(18) ==
".512") {
3890 bool IsDouble = Name[17] ==
'd';
3891 bool IsMin = Name[13] ==
'i';
3893 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3894 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3897 Rep = Builder.CreateIntrinsic(
3902 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3904 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3905 {CI->getArgOperand(0), Builder.getInt1(false)});
3908 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3909 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3910 bool IsVariable = Name[16] ==
'v';
3911 char Size = Name[16] ==
'.' ? Name[17]
3912 : Name[17] ==
'.' ? Name[18]
3913 : Name[18] ==
'.' ? Name[19]
3917 if (IsVariable && Name[17] !=
'.') {
3918 if (
Size ==
'd' && Name[17] ==
'2')
3919 IID = Intrinsic::x86_avx2_psllv_q;
3920 else if (
Size ==
'd' && Name[17] ==
'4')
3921 IID = Intrinsic::x86_avx2_psllv_q_256;
3922 else if (
Size ==
's' && Name[17] ==
'4')
3923 IID = Intrinsic::x86_avx2_psllv_d;
3924 else if (
Size ==
's' && Name[17] ==
'8')
3925 IID = Intrinsic::x86_avx2_psllv_d_256;
3926 else if (
Size ==
'h' && Name[17] ==
'8')
3927 IID = Intrinsic::x86_avx512_psllv_w_128;
3928 else if (
Size ==
'h' && Name[17] ==
'1')
3929 IID = Intrinsic::x86_avx512_psllv_w_256;
3930 else if (Name[17] ==
'3' && Name[18] ==
'2')
3931 IID = Intrinsic::x86_avx512_psllv_w_512;
3934 }
else if (Name.ends_with(
".128")) {
3936 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3937 : Intrinsic::x86_sse2_psll_d;
3938 else if (
Size ==
'q')
3939 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3940 : Intrinsic::x86_sse2_psll_q;
3941 else if (
Size ==
'w')
3942 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3943 : Intrinsic::x86_sse2_psll_w;
3946 }
else if (Name.ends_with(
".256")) {
3948 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3949 : Intrinsic::x86_avx2_psll_d;
3950 else if (
Size ==
'q')
3951 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3952 : Intrinsic::x86_avx2_psll_q;
3953 else if (
Size ==
'w')
3954 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3955 : Intrinsic::x86_avx2_psll_w;
3960 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3961 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3962 : Intrinsic::x86_avx512_psll_d_512;
3963 else if (
Size ==
'q')
3964 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3965 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3966 : Intrinsic::x86_avx512_psll_q_512;
3967 else if (
Size ==
'w')
3968 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3969 : Intrinsic::x86_avx512_psll_w_512;
3975 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3976 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3977 bool IsVariable = Name[16] ==
'v';
3978 char Size = Name[16] ==
'.' ? Name[17]
3979 : Name[17] ==
'.' ? Name[18]
3980 : Name[18] ==
'.' ? Name[19]
3984 if (IsVariable && Name[17] !=
'.') {
3985 if (
Size ==
'd' && Name[17] ==
'2')
3986 IID = Intrinsic::x86_avx2_psrlv_q;
3987 else if (
Size ==
'd' && Name[17] ==
'4')
3988 IID = Intrinsic::x86_avx2_psrlv_q_256;
3989 else if (
Size ==
's' && Name[17] ==
'4')
3990 IID = Intrinsic::x86_avx2_psrlv_d;
3991 else if (
Size ==
's' && Name[17] ==
'8')
3992 IID = Intrinsic::x86_avx2_psrlv_d_256;
3993 else if (
Size ==
'h' && Name[17] ==
'8')
3994 IID = Intrinsic::x86_avx512_psrlv_w_128;
3995 else if (
Size ==
'h' && Name[17] ==
'1')
3996 IID = Intrinsic::x86_avx512_psrlv_w_256;
3997 else if (Name[17] ==
'3' && Name[18] ==
'2')
3998 IID = Intrinsic::x86_avx512_psrlv_w_512;
4001 }
else if (Name.ends_with(
".128")) {
4003 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4004 : Intrinsic::x86_sse2_psrl_d;
4005 else if (
Size ==
'q')
4006 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4007 : Intrinsic::x86_sse2_psrl_q;
4008 else if (
Size ==
'w')
4009 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4010 : Intrinsic::x86_sse2_psrl_w;
4013 }
else if (Name.ends_with(
".256")) {
4015 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4016 : Intrinsic::x86_avx2_psrl_d;
4017 else if (
Size ==
'q')
4018 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4019 : Intrinsic::x86_avx2_psrl_q;
4020 else if (
Size ==
'w')
4021 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4022 : Intrinsic::x86_avx2_psrl_w;
4027 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4028 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4029 : Intrinsic::x86_avx512_psrl_d_512;
4030 else if (
Size ==
'q')
4031 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4032 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4033 : Intrinsic::x86_avx512_psrl_q_512;
4034 else if (
Size ==
'w')
4035 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4036 : Intrinsic::x86_avx512_psrl_w_512;
4042 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4043 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4044 bool IsVariable = Name[16] ==
'v';
4045 char Size = Name[16] ==
'.' ? Name[17]
4046 : Name[17] ==
'.' ? Name[18]
4047 : Name[18] ==
'.' ? Name[19]
4051 if (IsVariable && Name[17] !=
'.') {
4052 if (
Size ==
's' && Name[17] ==
'4')
4053 IID = Intrinsic::x86_avx2_psrav_d;
4054 else if (
Size ==
's' && Name[17] ==
'8')
4055 IID = Intrinsic::x86_avx2_psrav_d_256;
4056 else if (
Size ==
'h' && Name[17] ==
'8')
4057 IID = Intrinsic::x86_avx512_psrav_w_128;
4058 else if (
Size ==
'h' && Name[17] ==
'1')
4059 IID = Intrinsic::x86_avx512_psrav_w_256;
4060 else if (Name[17] ==
'3' && Name[18] ==
'2')
4061 IID = Intrinsic::x86_avx512_psrav_w_512;
4064 }
else if (Name.ends_with(
".128")) {
4066 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4067 : Intrinsic::x86_sse2_psra_d;
4068 else if (
Size ==
'q')
4069 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4070 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4071 : Intrinsic::x86_avx512_psra_q_128;
4072 else if (
Size ==
'w')
4073 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4074 : Intrinsic::x86_sse2_psra_w;
4077 }
else if (Name.ends_with(
".256")) {
4079 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4080 : Intrinsic::x86_avx2_psra_d;
4081 else if (
Size ==
'q')
4082 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4083 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4084 : Intrinsic::x86_avx512_psra_q_256;
4085 else if (
Size ==
'w')
4086 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4087 : Intrinsic::x86_avx2_psra_w;
4092 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4093 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4094 : Intrinsic::x86_avx512_psra_d_512;
4095 else if (
Size ==
'q')
4096 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4097 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4098 : Intrinsic::x86_avx512_psra_q_512;
4099 else if (
Size ==
'w')
4100 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4101 : Intrinsic::x86_avx512_psra_w_512;
4107 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4109 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4111 }
else if (Name.ends_with(
".movntdqa")) {
4115 LoadInst *LI = Builder.CreateAlignedLoad(
4120 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4121 Name.starts_with(
"fma.vfmsub.") ||
4122 Name.starts_with(
"fma.vfnmadd.") ||
4123 Name.starts_with(
"fma.vfnmsub.")) {
4124 bool NegMul = Name[6] ==
'n';
4125 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4126 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4137 if (NegMul && !IsScalar)
4138 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4139 if (NegMul && IsScalar)
4140 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4142 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4144 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4148 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4156 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4160 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4161 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4162 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4163 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4164 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4165 bool IsMask3 = Name[11] ==
'3';
4166 bool IsMaskZ = Name[11] ==
'z';
4168 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4169 bool NegMul = Name[2] ==
'n';
4170 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4176 if (NegMul && (IsMask3 || IsMaskZ))
4177 A = Builder.CreateFNeg(
A);
4178 if (NegMul && !(IsMask3 || IsMaskZ))
4179 B = Builder.CreateFNeg(
B);
4181 C = Builder.CreateFNeg(
C);
4183 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4184 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4185 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4192 if (Name.back() ==
'd')
4193 IID = Intrinsic::x86_avx512_vfmadd_f64;
4195 IID = Intrinsic::x86_avx512_vfmadd_f32;
4196 Rep = Builder.CreateIntrinsic(IID,
Ops);
4198 Rep = Builder.CreateFMA(
A,
B,
C);
4207 if (NegAcc && IsMask3)
4212 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4214 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4215 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4216 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4217 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4218 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4219 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4220 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4221 bool IsMask3 = Name[11] ==
'3';
4222 bool IsMaskZ = Name[11] ==
'z';
4224 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4225 bool NegMul = Name[2] ==
'n';
4226 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4232 if (NegMul && (IsMask3 || IsMaskZ))
4233 A = Builder.CreateFNeg(
A);
4234 if (NegMul && !(IsMask3 || IsMaskZ))
4235 B = Builder.CreateFNeg(
B);
4237 C = Builder.CreateFNeg(
C);
4244 if (Name[Name.size() - 5] ==
's')
4245 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4247 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4251 Rep = Builder.CreateFMA(
A,
B,
C);
4259 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4263 if (VecWidth == 128 && EltWidth == 32)
4264 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4265 else if (VecWidth == 256 && EltWidth == 32)
4266 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4267 else if (VecWidth == 128 && EltWidth == 64)
4268 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4269 else if (VecWidth == 256 && EltWidth == 64)
4270 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4276 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4277 Rep = Builder.CreateIntrinsic(IID,
Ops);
4278 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4279 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4280 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4281 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4282 bool IsMask3 = Name[11] ==
'3';
4283 bool IsMaskZ = Name[11] ==
'z';
4285 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4286 bool IsSubAdd = Name[3] ==
's';
4290 if (Name[Name.size() - 5] ==
's')
4291 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4293 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4298 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4300 Rep = Builder.CreateIntrinsic(IID,
Ops);
4309 Value *Odd = Builder.CreateCall(FMA,
Ops);
4310 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4311 Value *Even = Builder.CreateCall(FMA,
Ops);
4317 for (
int i = 0; i != NumElts; ++i)
4318 Idxs[i] = i + (i % 2) * NumElts;
4320 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4328 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4329 Name.starts_with(
"avx512.maskz.pternlog.")) {
4330 bool ZeroMask = Name[11] ==
'z';
4334 if (VecWidth == 128 && EltWidth == 32)
4335 IID = Intrinsic::x86_avx512_pternlog_d_128;
4336 else if (VecWidth == 256 && EltWidth == 32)
4337 IID = Intrinsic::x86_avx512_pternlog_d_256;
4338 else if (VecWidth == 512 && EltWidth == 32)
4339 IID = Intrinsic::x86_avx512_pternlog_d_512;
4340 else if (VecWidth == 128 && EltWidth == 64)
4341 IID = Intrinsic::x86_avx512_pternlog_q_128;
4342 else if (VecWidth == 256 && EltWidth == 64)
4343 IID = Intrinsic::x86_avx512_pternlog_q_256;
4344 else if (VecWidth == 512 && EltWidth == 64)
4345 IID = Intrinsic::x86_avx512_pternlog_q_512;
4351 Rep = Builder.CreateIntrinsic(IID, Args);
4355 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4356 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4357 bool ZeroMask = Name[11] ==
'z';
4358 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4361 if (VecWidth == 128 && !
High)
4362 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4363 else if (VecWidth == 256 && !
High)
4364 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4365 else if (VecWidth == 512 && !
High)
4366 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4367 else if (VecWidth == 128 &&
High)
4368 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4369 else if (VecWidth == 256 &&
High)
4370 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4371 else if (VecWidth == 512 &&
High)
4372 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4378 Rep = Builder.CreateIntrinsic(IID, Args);
4382 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4383 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4384 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4385 bool ZeroMask = Name[11] ==
'z';
4386 bool IndexForm = Name[17] ==
'i';
4388 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4389 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4390 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4391 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4392 bool ZeroMask = Name[11] ==
'z';
4393 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4396 if (VecWidth == 128 && !IsSaturating)
4397 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4398 else if (VecWidth == 256 && !IsSaturating)
4399 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4400 else if (VecWidth == 512 && !IsSaturating)
4401 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4402 else if (VecWidth == 128 && IsSaturating)
4403 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4404 else if (VecWidth == 256 && IsSaturating)
4405 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4406 else if (VecWidth == 512 && IsSaturating)
4407 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4417 if (Args[1]->
getType()->isVectorTy() &&
4420 ->isIntegerTy(32) &&
4421 Args[2]->
getType()->isVectorTy() &&
4424 ->isIntegerTy(32)) {
4425 Type *NewArgType =
nullptr;
4426 if (VecWidth == 128)
4428 else if (VecWidth == 256)
4430 else if (VecWidth == 512)
4436 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4437 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4440 Rep = Builder.CreateIntrinsic(IID, Args);
4444 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4445 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4446 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4447 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4448 bool ZeroMask = Name[11] ==
'z';
4449 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4452 if (VecWidth == 128 && !IsSaturating)
4453 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4454 else if (VecWidth == 256 && !IsSaturating)
4455 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4456 else if (VecWidth == 512 && !IsSaturating)
4457 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4458 else if (VecWidth == 128 && IsSaturating)
4459 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4460 else if (VecWidth == 256 && IsSaturating)
4461 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4462 else if (VecWidth == 512 && IsSaturating)
4463 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4473 if (Args[1]->
getType()->isVectorTy() &&
4476 ->isIntegerTy(32) &&
4477 Args[2]->
getType()->isVectorTy() &&
4480 ->isIntegerTy(32)) {
4481 Type *NewArgType =
nullptr;
4482 if (VecWidth == 128)
4484 else if (VecWidth == 256)
4486 else if (VecWidth == 512)
4492 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4493 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4496 Rep = Builder.CreateIntrinsic(IID, Args);
4500 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4501 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4502 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4504 if (Name[0] ==
'a' && Name.back() ==
'2')
4505 IID = Intrinsic::x86_addcarry_32;
4506 else if (Name[0] ==
'a' && Name.back() ==
'4')
4507 IID = Intrinsic::x86_addcarry_64;
4508 else if (Name[0] ==
's' && Name.back() ==
'2')
4509 IID = Intrinsic::x86_subborrow_32;
4510 else if (Name[0] ==
's' && Name.back() ==
'4')
4511 IID = Intrinsic::x86_subborrow_64;
4518 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4521 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4524 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4528 }
else if (Name.starts_with(
"avx512.mask.") &&
4539 if (Name.starts_with(
"neon.bfcvt")) {
4540 if (Name.starts_with(
"neon.bfcvtn2")) {
4542 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4544 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4545 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4548 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4549 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4551 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4555 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4556 return Builder.CreateShuffleVector(
4559 return Builder.CreateFPTrunc(CI->
getOperand(0),
4562 }
else if (Name.starts_with(
"sve.fcvt")) {
4565 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4566 .
Case(
"sve.fcvtnt.bf16f32",
4567 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4579 if (Args[1]->
getType() != BadPredTy)
4582 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4583 BadPredTy, Args[1]);
4584 Args[1] = Builder.CreateIntrinsic(
4585 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4587 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4596 if (Name ==
"mve.vctp64.old") {
4599 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4602 Value *C1 = Builder.CreateIntrinsic(
4603 Intrinsic::arm_mve_pred_v2i,
4605 return Builder.CreateIntrinsic(
4606 Intrinsic::arm_mve_pred_i2v,
4608 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4609 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4610 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4611 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4613 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4614 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4615 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4616 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4618 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4619 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4620 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4621 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4622 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4623 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4624 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4625 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4626 std::vector<Type *> Tys;
4630 case Intrinsic::arm_mve_mull_int_predicated:
4631 case Intrinsic::arm_mve_vqdmull_predicated:
4632 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4635 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4636 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4637 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4641 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4645 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4649 case Intrinsic::arm_cde_vcx1q_predicated:
4650 case Intrinsic::arm_cde_vcx1qa_predicated:
4651 case Intrinsic::arm_cde_vcx2q_predicated:
4652 case Intrinsic::arm_cde_vcx2qa_predicated:
4653 case Intrinsic::arm_cde_vcx3q_predicated:
4654 case Intrinsic::arm_cde_vcx3qa_predicated:
4661 std::vector<Value *>
Ops;
4663 Type *Ty =
Op->getType();
4664 if (Ty->getScalarSizeInBits() == 1) {
4665 Value *C1 = Builder.CreateIntrinsic(
4666 Intrinsic::arm_mve_pred_v2i,
4668 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4673 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4688 auto UpgradeLegacyWMMAIUIntrinsicCall =
4693 Args.push_back(Builder.getFalse());
4697 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4704 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4709 NewCall->copyMetadata(*CI);
4713 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4714 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4715 "intrinsic should have 7 arguments");
4718 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4720 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4721 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4722 "intrinsic should have 8 arguments");
4727 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4730 switch (
F->getIntrinsicID()) {
4733 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4734 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4735 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4736 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4737 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4738 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4753 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4756 F->getParent(),
F->getIntrinsicID(), Overloads);
4761 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4766 NewCall->copyMetadata(*CI);
4767 NewCall->takeName(CI);
4789 if (NumOperands < 3)
4802 bool IsVolatile =
false;
4806 if (NumOperands > 3)
4811 if (NumOperands > 5) {
4813 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4827 if (VT->getElementType()->isIntegerTy(16)) {
4830 Val = Builder.CreateBitCast(Val, AsBF16);
4838 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4840 unsigned AddrSpace = PtrTy->getAddressSpace();
4843 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4845 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4850 MDNode *RangeNotPrivate =
4853 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4859 return Builder.CreateBitCast(RMW, RetTy);
4880 return MAV->getMetadata();
4887 return I->getDebugLoc().getAsMDNode();
4895 if (Name ==
"label") {
4898 }
else if (Name ==
"assign") {
4905 }
else if (Name ==
"declare") {
4910 }
else if (Name ==
"addr") {
4920 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4922 }
else if (Name ==
"value") {
4925 unsigned ExprOp = 2;
4939 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4947 int64_t OffsetVal =
Offset->getSExtValue();
4948 return Builder.CreateIntrinsic(OffsetVal >= 0
4949 ? Intrinsic::vector_splice_left
4950 : Intrinsic::vector_splice_right,
4952 {CI->getArgOperand(0), CI->getArgOperand(1),
4953 Builder.getInt32(std::abs(OffsetVal))});
4958 if (Name.starts_with(
"to.fp16")) {
4960 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
4961 return Builder.CreateBitCast(Cast, CI->
getType());
4964 if (Name.starts_with(
"from.fp16")) {
4966 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
4967 return Builder.CreateFPExt(Cast, CI->
getType());
4992 if (!Name.consume_front(
"llvm."))
4995 bool IsX86 = Name.consume_front(
"x86.");
4996 bool IsNVVM = Name.consume_front(
"nvvm.");
4997 bool IsAArch64 = Name.consume_front(
"aarch64.");
4998 bool IsARM = Name.consume_front(
"arm.");
4999 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5000 bool IsDbg = Name.consume_front(
"dbg.");
5002 (Name.consume_front(
"experimental.vector.splice") ||
5003 Name.consume_front(
"vector.splice")) &&
5004 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5005 Value *Rep =
nullptr;
5007 if (!IsX86 && Name ==
"stackprotectorcheck") {
5009 }
else if (IsNVVM) {
5013 }
else if (IsAArch64) {
5017 }
else if (IsAMDGCN) {
5021 }
else if (IsOldSplice) {
5023 }
else if (Name.consume_front(
"convert.")) {
5035 const auto &DefaultCase = [&]() ->
void {
5043 "Unknown function for CallBase upgrade and isn't just a name change");
5051 "Return type must have changed");
5052 assert(OldST->getNumElements() ==
5054 "Must have same number of elements");
5057 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5060 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5061 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5062 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5081 case Intrinsic::arm_neon_vst1:
5082 case Intrinsic::arm_neon_vst2:
5083 case Intrinsic::arm_neon_vst3:
5084 case Intrinsic::arm_neon_vst4:
5085 case Intrinsic::arm_neon_vst2lane:
5086 case Intrinsic::arm_neon_vst3lane:
5087 case Intrinsic::arm_neon_vst4lane: {
5089 NewCall = Builder.CreateCall(NewFn, Args);
5092 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5093 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5094 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5099 NewCall = Builder.CreateCall(NewFn, Args);
5102 case Intrinsic::aarch64_sve_ld3_sret:
5103 case Intrinsic::aarch64_sve_ld4_sret:
5104 case Intrinsic::aarch64_sve_ld2_sret: {
5106 Name = Name.substr(5);
5113 unsigned MinElts = RetTy->getMinNumElements() /
N;
5115 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5117 for (
unsigned I = 0;
I <
N;
I++) {
5118 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5119 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5125 case Intrinsic::coro_end: {
5128 NewCall = Builder.CreateCall(NewFn, Args);
5132 case Intrinsic::vector_extract: {
5134 Name = Name.substr(5);
5135 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5140 unsigned MinElts = RetTy->getMinNumElements();
5143 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5147 case Intrinsic::vector_insert: {
5149 Name = Name.substr(5);
5150 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5154 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5159 NewCall = Builder.CreateCall(
5163 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5169 assert(
N > 1 &&
"Create is expected to be between 2-4");
5172 unsigned MinElts = RetTy->getMinNumElements() /
N;
5173 for (
unsigned I = 0;
I <
N;
I++) {
5175 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5182 case Intrinsic::arm_neon_bfdot:
5183 case Intrinsic::arm_neon_bfmmla:
5184 case Intrinsic::arm_neon_bfmlalb:
5185 case Intrinsic::arm_neon_bfmlalt:
5186 case Intrinsic::aarch64_neon_bfdot:
5187 case Intrinsic::aarch64_neon_bfmmla:
5188 case Intrinsic::aarch64_neon_bfmlalb:
5189 case Intrinsic::aarch64_neon_bfmlalt: {
5192 "Mismatch between function args and call args");
5193 size_t OperandWidth =
5195 assert((OperandWidth == 64 || OperandWidth == 128) &&
5196 "Unexpected operand width");
5198 auto Iter = CI->
args().begin();
5199 Args.push_back(*Iter++);
5200 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5201 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5202 NewCall = Builder.CreateCall(NewFn, Args);
5206 case Intrinsic::bitreverse:
5207 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5210 case Intrinsic::ctlz:
5211 case Intrinsic::cttz: {
5218 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5222 case Intrinsic::objectsize: {
5223 Value *NullIsUnknownSize =
5227 NewCall = Builder.CreateCall(
5232 case Intrinsic::ctpop:
5233 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5235 case Intrinsic::dbg_value: {
5237 Name = Name.substr(5);
5239 if (Name.starts_with(
"dbg.addr")) {
5253 if (
Offset->isNullValue()) {
5254 NewCall = Builder.CreateCall(
5263 case Intrinsic::ptr_annotation:
5271 NewCall = Builder.CreateCall(
5280 case Intrinsic::var_annotation:
5287 NewCall = Builder.CreateCall(
5296 case Intrinsic::riscv_aes32dsi:
5297 case Intrinsic::riscv_aes32dsmi:
5298 case Intrinsic::riscv_aes32esi:
5299 case Intrinsic::riscv_aes32esmi:
5300 case Intrinsic::riscv_sm4ks:
5301 case Intrinsic::riscv_sm4ed: {
5311 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5312 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5318 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5319 Value *Res = NewCall;
5321 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5327 case Intrinsic::nvvm_mapa_shared_cluster: {
5331 Value *Res = NewCall;
5332 Res = Builder.CreateAddrSpaceCast(
5339 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5340 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5343 Args[0] = Builder.CreateAddrSpaceCast(
5346 NewCall = Builder.CreateCall(NewFn, Args);
5352 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5353 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5354 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5355 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5356 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5357 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5358 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5359 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5366 Args[0] = Builder.CreateAddrSpaceCast(
5375 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5377 NewCall = Builder.CreateCall(NewFn, Args);
5383 case Intrinsic::riscv_sha256sig0:
5384 case Intrinsic::riscv_sha256sig1:
5385 case Intrinsic::riscv_sha256sum0:
5386 case Intrinsic::riscv_sha256sum1:
5387 case Intrinsic::riscv_sm3p0:
5388 case Intrinsic::riscv_sm3p1: {
5395 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5397 NewCall = Builder.CreateCall(NewFn, Arg);
5399 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5406 case Intrinsic::x86_xop_vfrcz_ss:
5407 case Intrinsic::x86_xop_vfrcz_sd:
5408 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5411 case Intrinsic::x86_xop_vpermil2pd:
5412 case Intrinsic::x86_xop_vpermil2ps:
5413 case Intrinsic::x86_xop_vpermil2pd_256:
5414 case Intrinsic::x86_xop_vpermil2ps_256: {
5418 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5419 NewCall = Builder.CreateCall(NewFn, Args);
5423 case Intrinsic::x86_sse41_ptestc:
5424 case Intrinsic::x86_sse41_ptestz:
5425 case Intrinsic::x86_sse41_ptestnzc: {
5439 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5440 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5442 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5446 case Intrinsic::x86_rdtscp: {
5452 NewCall = Builder.CreateCall(NewFn);
5454 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5457 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5465 case Intrinsic::x86_sse41_insertps:
5466 case Intrinsic::x86_sse41_dppd:
5467 case Intrinsic::x86_sse41_dpps:
5468 case Intrinsic::x86_sse41_mpsadbw:
5469 case Intrinsic::x86_avx_dp_ps_256:
5470 case Intrinsic::x86_avx2_mpsadbw: {
5476 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5477 NewCall = Builder.CreateCall(NewFn, Args);
5481 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5482 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5483 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5484 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5485 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5486 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5492 NewCall = Builder.CreateCall(NewFn, Args);
5501 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5502 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5503 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5504 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5505 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5506 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5510 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5511 Args[1] = Builder.CreateBitCast(
5514 NewCall = Builder.CreateCall(NewFn, Args);
5515 Value *Res = Builder.CreateBitCast(
5523 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5524 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5525 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5529 Args[1] = Builder.CreateBitCast(
5531 Args[2] = Builder.CreateBitCast(
5534 NewCall = Builder.CreateCall(NewFn, Args);
5538 case Intrinsic::thread_pointer: {
5539 NewCall = Builder.CreateCall(NewFn, {});
5543 case Intrinsic::memcpy:
5544 case Intrinsic::memmove:
5545 case Intrinsic::memset: {
5561 NewCall = Builder.CreateCall(NewFn, Args);
5563 AttributeList NewAttrs = AttributeList::get(
5564 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5565 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5566 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5571 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5574 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5578 case Intrinsic::masked_load:
5579 case Intrinsic::masked_gather:
5580 case Intrinsic::masked_store:
5581 case Intrinsic::masked_scatter: {
5587 auto GetMaybeAlign = [](
Value *
Op) {
5597 auto GetAlign = [&](
Value *
Op) {
5606 case Intrinsic::masked_load:
5607 NewCall = Builder.CreateMaskedLoad(
5611 case Intrinsic::masked_gather:
5612 NewCall = Builder.CreateMaskedGather(
5618 case Intrinsic::masked_store:
5619 NewCall = Builder.CreateMaskedStore(
5623 case Intrinsic::masked_scatter:
5624 NewCall = Builder.CreateMaskedScatter(
5626 DL.getValueOrABITypeAlignment(
5640 case Intrinsic::lifetime_start:
5641 case Intrinsic::lifetime_end: {
5653 NewCall = Builder.CreateLifetimeStart(Ptr);
5655 NewCall = Builder.CreateLifetimeEnd(Ptr);
5664 case Intrinsic::x86_avx512_vpdpbusd_128:
5665 case Intrinsic::x86_avx512_vpdpbusd_256:
5666 case Intrinsic::x86_avx512_vpdpbusd_512:
5667 case Intrinsic::x86_avx512_vpdpbusds_128:
5668 case Intrinsic::x86_avx512_vpdpbusds_256:
5669 case Intrinsic::x86_avx512_vpdpbusds_512:
5670 case Intrinsic::x86_avx2_vpdpbssd_128:
5671 case Intrinsic::x86_avx2_vpdpbssd_256:
5672 case Intrinsic::x86_avx10_vpdpbssd_512:
5673 case Intrinsic::x86_avx2_vpdpbssds_128:
5674 case Intrinsic::x86_avx2_vpdpbssds_256:
5675 case Intrinsic::x86_avx10_vpdpbssds_512:
5676 case Intrinsic::x86_avx2_vpdpbsud_128:
5677 case Intrinsic::x86_avx2_vpdpbsud_256:
5678 case Intrinsic::x86_avx10_vpdpbsud_512:
5679 case Intrinsic::x86_avx2_vpdpbsuds_128:
5680 case Intrinsic::x86_avx2_vpdpbsuds_256:
5681 case Intrinsic::x86_avx10_vpdpbsuds_512:
5682 case Intrinsic::x86_avx2_vpdpbuud_128:
5683 case Intrinsic::x86_avx2_vpdpbuud_256:
5684 case Intrinsic::x86_avx10_vpdpbuud_512:
5685 case Intrinsic::x86_avx2_vpdpbuuds_128:
5686 case Intrinsic::x86_avx2_vpdpbuuds_256:
5687 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5692 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5693 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5695 NewCall = Builder.CreateCall(NewFn, Args);
5698 case Intrinsic::x86_avx512_vpdpwssd_128:
5699 case Intrinsic::x86_avx512_vpdpwssd_256:
5700 case Intrinsic::x86_avx512_vpdpwssd_512:
5701 case Intrinsic::x86_avx512_vpdpwssds_128:
5702 case Intrinsic::x86_avx512_vpdpwssds_256:
5703 case Intrinsic::x86_avx512_vpdpwssds_512:
5704 case Intrinsic::x86_avx2_vpdpwsud_128:
5705 case Intrinsic::x86_avx2_vpdpwsud_256:
5706 case Intrinsic::x86_avx10_vpdpwsud_512:
5707 case Intrinsic::x86_avx2_vpdpwsuds_128:
5708 case Intrinsic::x86_avx2_vpdpwsuds_256:
5709 case Intrinsic::x86_avx10_vpdpwsuds_512:
5710 case Intrinsic::x86_avx2_vpdpwusd_128:
5711 case Intrinsic::x86_avx2_vpdpwusd_256:
5712 case Intrinsic::x86_avx10_vpdpwusd_512:
5713 case Intrinsic::x86_avx2_vpdpwusds_128:
5714 case Intrinsic::x86_avx2_vpdpwusds_256:
5715 case Intrinsic::x86_avx10_vpdpwusds_512:
5716 case Intrinsic::x86_avx2_vpdpwuud_128:
5717 case Intrinsic::x86_avx2_vpdpwuud_256:
5718 case Intrinsic::x86_avx10_vpdpwuud_512:
5719 case Intrinsic::x86_avx2_vpdpwuuds_128:
5720 case Intrinsic::x86_avx2_vpdpwuuds_256:
5721 case Intrinsic::x86_avx10_vpdpwuuds_512:
5726 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5727 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5729 NewCall = Builder.CreateCall(NewFn, Args);
5732 assert(NewCall &&
"Should have either set this variable or returned through "
5733 "the default case");
5740 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5754 F->eraseFromParent();
5760 if (NumOperands == 0)
5768 if (NumOperands == 3) {
5772 Metadata *Elts2[] = {ScalarType, ScalarType,
5786 if (
Opc != Instruction::BitCast)
5790 Type *SrcTy = V->getType();
5807 if (
Opc != Instruction::BitCast)
5810 Type *SrcTy =
C->getType();
5837 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5838 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5839 if (Flag->getNumOperands() < 3)
5841 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5842 return K->getString() ==
"Debug Info Version";
5845 if (OpIt != ModFlags->op_end()) {
5846 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5853 bool BrokenDebugInfo =
false;
5856 if (!BrokenDebugInfo)
5862 M.getContext().diagnose(Diag);
5869 M.getContext().diagnose(DiagVersion);
5879 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5882 if (
F->hasFnAttribute(Attr)) {
5885 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5887 auto [Part, Rest] = S.
split(
',');
5893 const unsigned Dim = DimC -
'x';
5894 assert(Dim < 3 &&
"Unexpected dim char");
5904 F->addFnAttr(Attr, NewAttr);
5908 return S ==
"x" || S ==
"y" || S ==
"z";
5913 if (K ==
"kernel") {
5925 const unsigned Idx = (AlignIdxValuePair >> 16);
5926 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5931 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5936 if (K ==
"minctasm") {
5941 if (K ==
"maxnreg") {
5946 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5950 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5954 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5958 if (K ==
"grid_constant") {
5973 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5980 if (!SeenNodes.
insert(MD).second)
5987 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5994 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5996 const MDOperand &V = MD->getOperand(j + 1);
5999 NewOperands.
append({K, V});
6002 if (NewOperands.
size() > 1)
6015 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6016 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6017 if (ModRetainReleaseMarker) {
6023 ID->getString().split(ValueComp,
"#");
6024 if (ValueComp.
size() == 2) {
6025 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6029 M.eraseNamedMetadata(ModRetainReleaseMarker);
6040 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6066 bool InvalidCast =
false;
6068 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6081 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6083 Args.push_back(Arg);
6090 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6095 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6108 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6116 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6117 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6118 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6119 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6120 {
"objc_autoreleaseReturnValue",
6121 llvm::Intrinsic::objc_autoreleaseReturnValue},
6122 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6123 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6124 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6125 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6126 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6127 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6128 {
"objc_release", llvm::Intrinsic::objc_release},
6129 {
"objc_retain", llvm::Intrinsic::objc_retain},
6130 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6131 {
"objc_retainAutoreleaseReturnValue",
6132 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6133 {
"objc_retainAutoreleasedReturnValue",
6134 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6135 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6136 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6137 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6138 {
"objc_unsafeClaimAutoreleasedReturnValue",
6139 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6140 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6141 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6142 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6143 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6144 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6145 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6146 {
"objc_arc_annotation_topdown_bbstart",
6147 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6148 {
"objc_arc_annotation_topdown_bbend",
6149 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6150 {
"objc_arc_annotation_bottomup_bbstart",
6151 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6152 {
"objc_arc_annotation_bottomup_bbend",
6153 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6155 for (
auto &
I : RuntimeFuncs)
6156 UpgradeToIntrinsic(
I.first,
I.second);
6160 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6164 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6165 bool HasSwiftVersionFlag =
false;
6166 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6173 if (
Op->getNumOperands() != 3)
6187 if (
ID->getString() ==
"Objective-C Image Info Version")
6189 if (
ID->getString() ==
"Objective-C Class Properties")
6190 HasClassProperties =
true;
6192 if (
ID->getString() ==
"PIC Level") {
6193 if (
auto *Behavior =
6195 uint64_t V = Behavior->getLimitedValue();
6201 if (
ID->getString() ==
"PIE Level")
6202 if (
auto *Behavior =
6209 if (
ID->getString() ==
"branch-target-enforcement" ||
6210 ID->getString().starts_with(
"sign-return-address")) {
6211 if (
auto *Behavior =
6217 Op->getOperand(1),
Op->getOperand(2)};
6227 if (
ID->getString() ==
"Objective-C Image Info Section") {
6230 Value->getString().split(ValueComp,
" ");
6231 if (ValueComp.
size() != 1) {
6232 std::string NewValue;
6233 for (
auto &S : ValueComp)
6234 NewValue += S.str();
6245 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6248 assert(Md->getValue() &&
"Expected non-empty metadata");
6249 auto Type = Md->getValue()->getType();
6252 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6253 if ((Val & 0xff) != Val) {
6254 HasSwiftVersionFlag =
true;
6255 SwiftABIVersion = (Val & 0xff00) >> 8;
6256 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6257 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6268 if (
ID->getString() ==
"amdgpu_code_object_version") {
6271 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6283 if (HasObjCFlag && !HasClassProperties) {
6289 if (HasSwiftVersionFlag) {
6293 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6295 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6303 auto TrimSpaces = [](
StringRef Section) -> std::string {
6305 Section.split(Components,
',');
6310 for (
auto Component : Components)
6311 OS <<
',' << Component.trim();
6316 for (
auto &GV : M.globals()) {
6317 if (!GV.hasSection())
6322 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6327 GV.setSection(TrimSpaces(Section));
6343struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6344 StrictFPUpgradeVisitor() =
default;
6347 if (!
Call.isStrictFP())
6353 Call.removeFnAttr(Attribute::StrictFP);
6354 Call.addFnAttr(Attribute::NoBuiltin);
6359struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6360 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6361 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6363 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6378 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6379 StrictFPUpgradeVisitor SFPV;
6384 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6385 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6386 for (
auto &Arg :
F.args())
6388 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6390 bool AddingAttrs =
false, RemovingAttrs =
false;
6391 AttrBuilder AttrsToAdd(
F.getContext());
6396 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6397 A.isValid() &&
A.isStringAttribute()) {
6398 F.setSection(
A.getValueAsString());
6400 RemovingAttrs =
true;
6404 A.isValid() &&
A.isStringAttribute()) {
6407 AddingAttrs = RemovingAttrs =
true;
6410 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6411 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6413 RemovingAttrs =
true;
6414 if (
A.getValueAsString() ==
"true") {
6415 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6424 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6427 if (
A.getValueAsBool()) {
6428 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6434 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6435 RemovingAttrs =
true;
6442 bool HandleDenormalMode =
false;
6444 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6447 DenormalFPMath = ParsedMode;
6449 AddingAttrs = RemovingAttrs =
true;
6450 HandleDenormalMode =
true;
6454 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6458 DenormalFPMathF32 = ParsedMode;
6460 AddingAttrs = RemovingAttrs =
true;
6461 HandleDenormalMode =
true;
6465 if (HandleDenormalMode)
6466 AttrsToAdd.addDenormalFPEnvAttr(
6470 F.removeFnAttrs(AttrsToRemove);
6473 F.addFnAttrs(AttrsToAdd);
6479 if (!
F.hasFnAttribute(FnAttrName))
6480 F.addFnAttr(FnAttrName,
Value);
6487 if (!
F.hasFnAttribute(FnAttrName)) {
6489 F.addFnAttr(FnAttrName);
6491 auto A =
F.getFnAttribute(FnAttrName);
6492 if (
"false" ==
A.getValueAsString())
6493 F.removeFnAttr(FnAttrName);
6494 else if (
"true" ==
A.getValueAsString()) {
6495 F.removeFnAttr(FnAttrName);
6496 F.addFnAttr(FnAttrName);
6502 Triple T(M.getTargetTriple());
6503 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6513 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6517 if (
Op->getNumOperands() != 3)
6526 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6527 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6528 : IDStr ==
"guarded-control-stack" ? &GCSValue
6529 : IDStr ==
"sign-return-address" ? &SRAValue
6530 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6531 : IDStr ==
"sign-return-address-with-bkey"
6537 *ValPtr = CI->getZExtValue();
6543 bool BTE = BTEValue == 1;
6544 bool BPPLR = BPPLRValue == 1;
6545 bool GCS = GCSValue == 1;
6546 bool SRA = SRAValue == 1;
6549 if (SRA && SRAALLValue == 1)
6550 SignTypeValue =
"all";
6553 if (SRA && SRABKeyValue == 1)
6554 SignKeyValue =
"b_key";
6556 for (
Function &
F : M.getFunctionList()) {
6557 if (
F.isDeclaration())
6564 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6565 A.isValid() &&
"none" ==
A.getValueAsString()) {
6566 F.removeFnAttr(
"sign-return-address");
6567 F.removeFnAttr(
"sign-return-address-key");
6583 if (SRAALLValue == 1)
6585 if (SRABKeyValue == 1)
6594 if (
T->getNumOperands() < 1)
6599 return S->getString().starts_with(
"llvm.vectorizer.");
6603 StringRef OldPrefix =
"llvm.vectorizer.";
6606 if (OldTag ==
"llvm.vectorizer.unroll")
6618 if (
T->getNumOperands() < 1)
6623 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6628 Ops.reserve(
T->getNumOperands());
6630 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6631 Ops.push_back(
T->getOperand(
I));
6645 Ops.reserve(
T->getNumOperands());
6656 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6657 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6658 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6661 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6663 auto I =
DL.find(
"-n64-");
6665 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6670 std::string Res =
DL.str();
6673 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6674 Res.append(Res.empty() ?
"G1" :
"-G1");
6682 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6683 Res.append(
"-ni:7:8:9");
6685 if (
DL.ends_with(
"ni:7"))
6687 if (
DL.ends_with(
"ni:7:8"))
6692 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6693 Res.append(
"-p7:160:256:256:32");
6694 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6695 Res.append(
"-p8:128:128:128:48");
6696 constexpr StringRef OldP8(
"-p8:128:128-");
6697 if (
DL.contains(OldP8))
6698 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6699 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6700 Res.append(
"-p9:192:256:256:32");
6704 if (!
DL.contains(
"m:e"))
6705 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6710 if (
T.isSystemZ() && !
DL.empty()) {
6712 if (!
DL.contains(
"-S64"))
6713 return "E-S64" +
DL.drop_front(1).str();
6717 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6720 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6721 if (!
DL.contains(AddrSpaces)) {
6723 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6724 if (R.match(Res, &
Groups))
6730 if (
T.isAArch64()) {
6732 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6733 Res.append(
"-Fn32");
6734 AddPtr32Ptr64AddrSpaces();
6738 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6742 std::string I64 =
"-i64:64";
6743 std::string I128 =
"-i128:128";
6745 size_t Pos = Res.find(I64);
6746 if (Pos !=
size_t(-1))
6747 Res.insert(Pos + I64.size(), I128);
6751 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6752 size_t Pos = Res.find(
"-S128");
6755 Res.insert(Pos,
"-f64:32:64");
6761 AddPtr32Ptr64AddrSpaces();
6769 if (!
T.isOSIAMCU()) {
6770 std::string I128 =
"-i128:128";
6773 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6774 if (R.match(Res, &
Groups))
6782 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6784 auto I =
Ref.find(
"-f80:32-");
6786 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6794 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6797 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6798 B.removeAttribute(
"no-frame-pointer-elim");
6800 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6802 if (FramePointer !=
"all")
6803 FramePointer =
"non-leaf";
6804 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6806 if (!FramePointer.
empty())
6807 B.addAttribute(
"frame-pointer", FramePointer);
6809 A =
B.getAttribute(
"null-pointer-is-valid");
6812 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6813 B.removeAttribute(
"null-pointer-is-valid");
6814 if (NullPointerIsValid)
6815 B.addAttribute(Attribute::NullPointerIsValid);
6818 A =
B.getAttribute(
"uniform-work-group-size");
6822 bool IsTrue = Val ==
"true";
6823 B.removeAttribute(
"uniform-work-group-size");
6825 B.addAttribute(
"uniform-work-group-size");
6836 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.