LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 const LLT bf16 = LLT::bfloat16();
69 const LLT v4bf16 = LLT::fixed_vector(4, bf16);
70 const LLT v8bf16 = LLT::fixed_vector(8, bf16);
71
72 const LLT f16 = LLT::float16();
73 const LLT v4f16 = LLT::fixed_vector(4, f16);
74 const LLT v8f16 = LLT::fixed_vector(8, f16);
75
76 const LLT f32 = LLT::float32();
77 const LLT v2f32 = LLT::fixed_vector(2, f32);
78 const LLT v4f32 = LLT::fixed_vector(4, f32);
79
80 const LLT f64 = LLT::float64();
81 const LLT v2f64 = LLT::fixed_vector(2, f64);
82
83 const LLT f128 = LLT::float128();
84
85 const LLT i8 = LLT::integer(8);
86 const LLT v8i8 = LLT::fixed_vector(8, i8);
87 const LLT v16i8 = LLT::fixed_vector(16, i8);
88
89 const LLT i16 = LLT::integer(16);
90 const LLT v8i16 = LLT::fixed_vector(8, i16);
91 const LLT v4i16 = LLT::fixed_vector(4, i16);
92
93 const LLT i32 = LLT::integer(32);
94 const LLT v2i32 = LLT::fixed_vector(2, i32);
95 const LLT v4i32 = LLT::fixed_vector(4, i32);
96
97 const LLT i64 = LLT::integer(64);
98 const LLT v2i64 = LLT::fixed_vector(2, i64);
99
100 const LLT i128 = LLT::integer(128);
101
102 const LLT nxv16i8 = LLT::scalable_vector(16, i8);
103 const LLT nxv8i16 = LLT::scalable_vector(8, i16);
104 const LLT nxv4i32 = LLT::scalable_vector(4, i32);
105 const LLT nxv2i64 = LLT::scalable_vector(2, i64);
106
107 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
108 v16s8, v8s16, v4s32,
109 v2s64, v2p0,
110 /* End 128bit types */
111 /* Begin 64bit types */
112 v8s8, v4s16, v2s32};
113 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
114 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
115 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
116
117 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
118
119 // FIXME: support subtargets which have neon/fp-armv8 disabled.
120 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
122 return;
123 }
124
125 // Some instructions only support s16 if the subtarget has full 16-bit FP
126 // support.
127 const bool HasFP16 = ST.hasFullFP16();
128 const LLT &MinFPScalar = HasFP16 ? f16 : f32;
129
130 const bool HasCSSC = ST.hasCSSC();
131 const bool HasRCPC3 = ST.hasRCPC3();
132 const bool HasSVE = ST.hasSVE();
133
135 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
136 .legalFor({p0, s8, s16, s32, s64, s128})
137 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
138 v2s64, v2p0})
139 .widenScalarToNextPow2(0)
140 .clampScalar(0, s8, s64)
143 .clampNumElements(0, v8s8, v16s8)
144 .clampNumElements(0, v4s16, v8s16)
145 .clampNumElements(0, v2s32, v4s32)
146 .clampMaxNumElements(0, s64, 2)
147 .clampMaxNumElements(0, p0, 2)
149
151 .legalFor({p0, s16, s32, s64})
152 .legalFor(PackedVectorAllTypeList)
156 .clampScalar(0, s16, s64)
157 .clampNumElements(0, v8s8, v16s8)
158 .clampNumElements(0, v4s16, v8s16)
159 .clampNumElements(0, v2s32, v4s32)
160 .clampMaxNumElements(0, s64, 2)
161 .clampMaxNumElements(0, p0, 2);
162
164 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
165 smallerThan(1, 0)))
166 .widenScalarToNextPow2(0)
167 .clampScalar(0, s32, s64)
169 .minScalar(1, s8)
170 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
171 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
172
174 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
175 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
176 .widenScalarToNextPow2(1)
177 .clampScalar(1, s32, s128)
179 .minScalar(0, s16)
180 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
181 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
182 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
183
184 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
185 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
186 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
187 .widenScalarToNextPow2(0)
188 .clampScalar(0, s32, s64)
189 .clampMaxNumElements(0, s8, 16)
190 .clampMaxNumElements(0, s16, 8)
191 .clampNumElements(0, v2s32, v4s32)
192 .clampNumElements(0, v2s64, v2s64)
194 [=](const LegalityQuery &Query) {
195 return Query.Types[0].getNumElements() <= 2;
196 },
197 0, s32)
198 .minScalarOrEltIf(
199 [=](const LegalityQuery &Query) {
200 return Query.Types[0].getNumElements() <= 4;
201 },
202 0, s16)
203 .minScalarOrEltIf(
204 [=](const LegalityQuery &Query) {
205 return Query.Types[0].getNumElements() <= 16;
206 },
207 0, s8)
208 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
210
212 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
213 .widenScalarToNextPow2(0)
214 .clampScalar(0, s32, s64)
215 .clampMaxNumElements(0, s8, 16)
216 .clampMaxNumElements(0, s16, 8)
217 .clampNumElements(0, v2s32, v4s32)
218 .clampNumElements(0, v2s64, v2s64)
220 [=](const LegalityQuery &Query) {
221 return Query.Types[0].getNumElements() <= 2;
222 },
223 0, s32)
224 .minScalarOrEltIf(
225 [=](const LegalityQuery &Query) {
226 return Query.Types[0].getNumElements() <= 4;
227 },
228 0, s16)
229 .minScalarOrEltIf(
230 [=](const LegalityQuery &Query) {
231 return Query.Types[0].getNumElements() <= 16;
232 },
233 0, s8)
234 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
236
237 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
238 .customIf([=](const LegalityQuery &Query) {
239 const auto &SrcTy = Query.Types[0];
240 const auto &AmtTy = Query.Types[1];
241 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
242 AmtTy.getSizeInBits() == 32;
243 })
244 .legalFor({
245 {i32, i32},
246 {i32, i64},
247 {i64, i64},
248 {v8i8, v8i8},
249 {v16i8, v16i8},
250 {v4i16, v4i16},
251 {v8i16, v8i16},
252 {v2i32, v2i32},
253 {v4i32, v4i32},
254 {v2i64, v2i64},
255 })
256 .widenScalarToNextPow2(0)
257 .clampScalar(1, s32, s64)
258 .clampScalar(0, s32, s64)
259 .clampNumElements(0, v8s8, v16s8)
260 .clampNumElements(0, v4s16, v8s16)
261 .clampNumElements(0, v2s32, v4s32)
262 .clampNumElements(0, v2s64, v2s64)
264 .minScalarSameAs(1, 0)
268
270 .legalFor({{p0, i64}, {v2p0, v2i64}})
271 .clampScalarOrElt(1, s64, s64)
272 .clampNumElements(0, v2p0, v2p0);
273
274 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
275
276 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
277 .legalFor({i32, i64})
278 .libcallFor({i128})
279 .clampScalar(0, s32, s64)
281 .scalarize(0);
282
283 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
284 .lowerFor({i8, i16, i32, i64, v2i32, v4i32, v2i64})
285 .libcallFor({i128})
287 .minScalarOrElt(0, s32)
288 .clampNumElements(0, v2s32, v4s32)
289 .clampNumElements(0, v2s64, v2s64)
290 .scalarize(0);
291
292 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
293 .widenScalarToNextPow2(0, /*Min = */ 32)
294 .clampScalar(0, s32, s64)
295 .lower();
296
297 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
298 .legalFor({i64, v16i8, v8i16, v4i32})
299 .lower();
300
301 getActionDefinitionsBuilder({G_SMULFIX, G_UMULFIX}).lower();
302
303 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
304 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
305 .legalFor(HasCSSC, {i32, i64})
306 .minScalar(HasCSSC, 0, s32)
307 .clampNumElements(0, v8s8, v16s8)
308 .clampNumElements(0, v4s16, v8s16)
309 .clampNumElements(0, v2s32, v4s32)
310 .lower();
311
312 // FIXME: Legal vector types are only legal with NEON.
314 .legalFor(HasCSSC, {i32, i64})
315 .legalFor({v16i8, v8i16, v4i32, v2i64, v2p0, v8i8, v4i16, v2i32})
316 .customIf([=](const LegalityQuery &Q) {
317 // TODO: Fix suboptimal codegen for 128+ bit types.
318 LLT SrcTy = Q.Types[0];
319 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
320 })
321 .widenScalarIf(
322 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
323 [=](const LegalityQuery &Query) { return std::make_pair(0, v4i16); })
324 .widenScalarIf(
325 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
326 [=](const LegalityQuery &Query) { return std::make_pair(0, v2i32); })
327 .clampNumElements(0, v8s8, v16s8)
328 .clampNumElements(0, v4s16, v8s16)
329 .clampNumElements(0, v2s32, v4s32)
330 .clampNumElements(0, v2s64, v2s64)
332 .lower();
333
335 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
336 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
337 .lower();
338
340 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
341 .legalFor({{i32, i32}, {i64, i32}})
342 .clampScalar(0, s32, s64)
343 .clampScalar(1, s32, s64)
345
346 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
347 .customFor({{i32, i32}, {i32, i64}, {i64, i64}})
348 .lower();
349
351 .legalFor({{i32, i64}, {i64, i64}})
352 .customIf([=](const LegalityQuery &Q) {
353 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
354 })
355 .lower();
357
358 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
359 .customFor({{s32, s32}, {s64, s64}});
360
361 auto always = [=](const LegalityQuery &Q) { return true; };
363 .legalFor(HasCSSC, {{i32, i32}, {i64, i64}})
364 .legalFor({{v8i8, v8i8}, {v16i8, v16i8}})
365 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
366 .customFor({{s128, s128},
367 {v4s16, v4s16},
368 {v8s16, v8s16},
369 {v2s32, v2s32},
370 {v4s32, v4s32},
371 {v2s64, v2s64}})
372 .clampScalar(0, s32, s128)
375 .minScalarEltSameAsIf(always, 1, 0)
376 .maxScalarEltSameAsIf(always, 1, 0)
377 .clampNumElements(0, v8s8, v16s8)
378 .clampNumElements(0, v4s16, v8s16)
379 .clampNumElements(0, v2s32, v4s32)
380 .clampNumElements(0, v2s64, v2s64)
383
384 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
385 .legalFor({{i32, i32},
386 {i64, i64},
387 {v8i8, v8i8},
388 {v16i8, v16i8},
389 {v4i16, v4i16},
390 {v8i16, v8i16},
391 {v2i32, v2i32},
392 {v4i32, v4i32}})
393 .widenScalarToNextPow2(1, /*Min=*/32)
394 .clampScalar(1, s32, s64)
396 .clampNumElements(0, v8s8, v16s8)
397 .clampNumElements(0, v4s16, v8s16)
398 .clampNumElements(0, v2s32, v4s32)
401 .scalarSameSizeAs(0, 1);
402
403 getActionDefinitionsBuilder(G_INSERT_SUBVECTOR).lower();
404
405 getActionDefinitionsBuilder(G_CTLZ_ZERO_POISON).lower();
406
408 .lowerIf(isVector(0))
409 .widenScalarToNextPow2(1, /*Min=*/32)
410 .clampScalar(1, s32, s64)
411 .scalarSameSizeAs(0, 1)
412 .legalFor(HasCSSC, {s32, s64})
413 .customFor(!HasCSSC, {s32, s64});
414
415 getActionDefinitionsBuilder(G_CTTZ_ZERO_POISON).lower();
416
417 getActionDefinitionsBuilder(G_BITREVERSE)
418 .legalFor({i32, i64, v8i8, v16i8})
419 .widenScalarToNextPow2(0, /*Min = */ 32)
421 .clampScalar(0, s32, s64)
422 .clampNumElements(0, v8s8, v16s8)
423 .clampNumElements(0, v4s16, v8s16)
424 .clampNumElements(0, v2s32, v4s32)
425 .clampNumElements(0, v2s64, v2s64)
428 .lower();
429
430 getActionDefinitionsBuilder(G_CLMUL).legalFor({v8i8, v16i8});
431
433 .legalFor({i32, i64, v4i16, v8i16, v2i32, v4i32, v2i64})
435 .clampScalar(0, s32, s64)
436 .clampNumElements(0, v4s16, v8s16)
437 .clampNumElements(0, v2s32, v4s32)
438 .clampNumElements(0, v2s64, v2s64)
440
441 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
442 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
443 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
444 .clampNumElements(0, v8s8, v16s8)
445 .clampNumElements(0, v4s16, v8s16)
446 .clampNumElements(0, v2s32, v4s32)
447 .clampMaxNumElements(0, s64, 2)
450 .lower();
451
453 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
454 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
455 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
456 .legalFor({f32, f64, v2f32, v4f32, v2f64})
457 .legalFor(HasFP16, {f16, v4f16, v8f16})
458 .libcallFor({f128})
459 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
461 [=](const LegalityQuery &Q) {
462 return (!HasFP16 && Q.Types[0].getScalarType().isFloat16()) ||
463 Q.Types[0].getScalarType().isBFloat16();
464 },
465 changeElementTo(0, f32))
466 .clampNumElements(0, v4s16, v8s16)
467 .clampNumElements(0, v2s32, v4s32)
468 .clampNumElements(0, v2s64, v2s64)
470
471 getActionDefinitionsBuilder({G_FABS, G_FNEG})
472 .legalFor({f32, f64, v2f32, v4f32, v2f64})
473 .legalFor(HasFP16, {f16, bf16, v4f16, v4bf16, v8f16, v8bf16})
474 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
476 .clampNumElements(0, v4s16, v8s16)
477 .clampNumElements(0, v2s32, v4s32)
478 .clampNumElements(0, v2s64, v2s64)
480 .lowerFor({f16, bf16, v4f16, v4bf16, v8f16, v8bf16});
481
482 getActionDefinitionsBuilder({G_FREM, G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
483 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
484 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
485 G_FSINH, G_FTANH, G_FMODF})
486 .libcallFor({f32, f64, f128})
487 .widenScalarFor({f16, bf16}, changeElementTo(0, f32))
488 .scalarize(0);
489 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
490 .libcallFor({{f32, i32}, {f64, i32}, {f128, i32}})
491 .widenScalarFor({f16, bf16}, changeElementTo(0, f32))
492 .scalarize(0);
493
494 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
495 .legalFor({{i32, f32}, {i32, f64}, {i64, f32}, {i64, f64}})
496 .legalFor(HasFP16, {{i32, f16}, {i64, f16}})
497 .minScalar(1, s32)
498 .libcallFor({{s64, s128}})
499 .lower();
500 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
501 .legalFor({{i64, f32}, {i64, f64}})
502 .legalFor(HasFP16, {{i64, f16}})
503 .minScalar(0, s64)
504 .minScalar(1, s32)
505 .libcallFor({{s64, s128}})
506 .lower();
507
508 // TODO: Custom legalization for mismatched types.
509 getActionDefinitionsBuilder(G_FCOPYSIGN)
511 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
512 [=](const LegalityQuery &Query) {
513 const LLT Ty = Query.Types[0];
514 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
515 })
516 .lower();
517
519
520 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
521 auto &Actions = getActionDefinitionsBuilder(Op);
522
523 if (Op == G_SEXTLOAD)
525
526 // Atomics have zero extending behavior.
527 Actions
528 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
529 {s32, p0, s16, 8},
530 {s32, p0, s32, 8},
531 {s64, p0, s8, 2},
532 {s64, p0, s16, 2},
533 {s64, p0, s32, 4},
534 {s64, p0, s64, 8},
535 {p0, p0, s64, 8},
536 {v2s32, p0, s64, 8}})
537 .widenScalarToNextPow2(0)
538 .clampScalar(0, s32, s64)
539 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
540 // how to do that yet.
541 .unsupportedIfMemSizeNotPow2()
542 // Lower anything left over into G_*EXT and G_LOAD
543 .lower();
544 }
545
546 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
547 const LLT &ValTy = Query.Types[0];
548 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
549 };
550
552 .customIf([=](const LegalityQuery &Query) {
553 return HasRCPC3 && Query.Types[0] == s128 &&
554 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
555 })
556 .customIf([=](const LegalityQuery &Query) {
557 return Query.Types[0] == s128 &&
558 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
559 })
560 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
561 {s16, p0, s16, 8},
562 {s32, p0, s32, 8},
563 {s64, p0, s64, 8},
564 {p0, p0, s64, 8},
565 {s128, p0, s128, 8},
566 {v8s8, p0, s64, 8},
567 {v16s8, p0, s128, 8},
568 {v4s16, p0, s64, 8},
569 {v8s16, p0, s128, 8},
570 {v2s32, p0, s64, 8},
571 {v4s32, p0, s128, 8},
572 {v2s64, p0, s128, 8}})
573 // These extends are also legal
574 .legalForTypesWithMemDesc(
575 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
576 .legalForTypesWithMemDesc({
577 // SVE vscale x 128 bit base sizes
578 {nxv16s8, p0, nxv16s8, 8},
579 {nxv8s16, p0, nxv8s16, 8},
580 {nxv4s32, p0, nxv4s32, 8},
581 {nxv2s64, p0, nxv2s64, 8},
582 })
583 .widenScalarToNextPow2(0, /* MinSize = */ 8)
584 .clampMaxNumElements(0, s8, 16)
585 .clampMaxNumElements(0, s16, 8)
586 .clampMaxNumElements(0, s32, 4)
587 .clampMaxNumElements(0, s64, 2)
588 .clampMaxNumElements(0, p0, 2)
590 .clampScalar(0, s8, s64)
592 [=](const LegalityQuery &Query) {
593 // Clamp extending load results to 32-bits.
594 return Query.Types[0].isScalar() &&
595 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
596 Query.Types[0].getSizeInBits() > 32;
597 },
598 changeTo(0, s32))
599 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
600 .bitcastIf(typeInSet(0, {v4s8}),
601 [=](const LegalityQuery &Query) {
602 const LLT VecTy = Query.Types[0];
603 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
604 })
605 .customIf(IsPtrVecPred)
606 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
607 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
608
610 .customIf([=](const LegalityQuery &Query) {
611 return HasRCPC3 && Query.Types[0] == s128 &&
612 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
613 })
614 .customIf([=](const LegalityQuery &Query) {
615 return Query.Types[0] == s128 &&
616 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
617 })
618 .widenScalarIf(
619 all(scalarNarrowerThan(0, 32),
621 changeTo(0, s32))
623 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
624 {s32, p0, s8, 8}, // truncstorei8 from s32
625 {s64, p0, s8, 8}, // truncstorei8 from s64
626 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
627 {s64, p0, s16, 8}, // truncstorei16 from s64
628 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
629 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
630 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
631 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
632 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
633 .legalForTypesWithMemDesc({
634 // SVE vscale x 128 bit base sizes
635 // TODO: Add nxv2p0. Consider bitcastIf.
636 // See #92130
637 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
638 {nxv16s8, p0, nxv16s8, 8},
639 {nxv8s16, p0, nxv8s16, 8},
640 {nxv4s32, p0, nxv4s32, 8},
641 {nxv2s64, p0, nxv2s64, 8},
642 })
643 .clampScalar(0, s8, s64)
644 .minScalarOrElt(0, s8)
645 .lowerIf([=](const LegalityQuery &Query) {
646 return Query.Types[0].isScalar() &&
647 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
648 })
649 // Maximum: sN * k = 128
650 .clampMaxNumElements(0, s8, 16)
651 .clampMaxNumElements(0, s16, 8)
652 .clampMaxNumElements(0, s32, 4)
653 .clampMaxNumElements(0, s64, 2)
654 .clampMaxNumElements(0, p0, 2)
656 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
657 .bitcastIf(all(typeInSet(0, {v4s8}),
658 LegalityPredicate([=](const LegalityQuery &Query) {
659 return Query.Types[0].getSizeInBits() ==
660 Query.MMODescrs[0].MemoryTy.getSizeInBits();
661 })),
662 [=](const LegalityQuery &Query) {
663 const LLT VecTy = Query.Types[0];
664 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
665 })
666 .customIf(IsPtrVecPred)
667 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
668 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
669 .lower();
670
671 getActionDefinitionsBuilder(G_INDEXED_STORE)
672 // Idx 0 == Ptr, Idx 1 == Val
673 // TODO: we can implement legalizations but as of now these are
674 // generated in a very specific way.
676 {p0, s8, s8, 8},
677 {p0, s16, s16, 8},
678 {p0, s32, s8, 8},
679 {p0, s32, s16, 8},
680 {p0, s32, s32, 8},
681 {p0, s64, s64, 8},
682 {p0, p0, p0, 8},
683 {p0, v8s8, v8s8, 8},
684 {p0, v16s8, v16s8, 8},
685 {p0, v4s16, v4s16, 8},
686 {p0, v8s16, v8s16, 8},
687 {p0, v2s32, v2s32, 8},
688 {p0, v4s32, v4s32, 8},
689 {p0, v2s64, v2s64, 8},
690 {p0, v2p0, v2p0, 8},
691 {p0, s128, s128, 8},
692 })
693 .unsupported();
694
695 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
696 LLT LdTy = Query.Types[0];
697 LLT PtrTy = Query.Types[1];
698 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
699 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
700 return false;
701 if (PtrTy != p0)
702 return false;
703 return true;
704 };
705 getActionDefinitionsBuilder(G_INDEXED_LOAD)
708 .legalIf(IndexedLoadBasicPred)
709 .unsupported();
710 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
711 .unsupportedIf(
713 .legalIf(all(typeInSet(0, {s16, s32, s64}),
714 LegalityPredicate([=](const LegalityQuery &Q) {
715 LLT LdTy = Q.Types[0];
716 LLT PtrTy = Q.Types[1];
717 LLT MemTy = Q.MMODescrs[0].MemoryTy;
718 if (PtrTy != p0)
719 return false;
720 if (LdTy == s16)
721 return MemTy == s8;
722 if (LdTy == s32)
723 return MemTy == s8 || MemTy == s16;
724 if (LdTy == s64)
725 return MemTy == s8 || MemTy == s16 || MemTy == s32;
726 return false;
727 })))
728 .unsupported();
729
730 // Constants
732 .legalFor({p0, s8, s16, s32, s64})
733 .widenScalarToNextPow2(0)
734 .clampScalar(0, s8, s64);
735 getActionDefinitionsBuilder(G_FCONSTANT)
736 .legalFor({s16, s32, s64, s128});
737
738 // FIXME: fix moreElementsToNextPow2
740 .legalFor({{i32, i32}, {i32, i64}, {i32, p0}})
742 .minScalarOrElt(1, s8)
743 .clampScalar(1, s32, s64)
744 .clampScalar(0, s32, s32)
747 [=](const LegalityQuery &Query) {
748 const LLT &Ty = Query.Types[0];
749 const LLT &SrcTy = Query.Types[1];
750 return Ty.isVector() && !SrcTy.isPointerVector() &&
751 Ty.getElementType() != SrcTy.getElementType();
752 },
753 0, 1)
754 .minScalarOrEltIf(
755 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
756 1, s32)
757 .minScalarOrEltIf(
758 [=](const LegalityQuery &Query) {
759 return Query.Types[1].isPointerVector();
760 },
761 0, s64)
763 .clampNumElements(1, v8s8, v16s8)
764 .clampNumElements(1, v4s16, v8s16)
765 .clampNumElements(1, v2s32, v4s32)
766 .clampNumElements(1, v2s64, v2s64)
767 .clampNumElements(1, v2p0, v2p0)
768 .customIf(isVector(0));
769
771 .legalFor({{i32, f32},
772 {i32, f64},
773 {v4i32, v4f32},
774 {v2i32, v2f32},
775 {v2i64, v2f64}})
776 .legalFor(HasFP16, {{i32, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
778 .clampScalar(0, s32, s32)
780 [=](const LegalityQuery &Q) {
781 return (!HasFP16 && Q.Types[1].getScalarType().isFloat16()) ||
782 Q.Types[1].getScalarType().isBFloat16();
783 },
784 changeElementTo(1, f32))
785 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
787 [=](const LegalityQuery &Query) {
788 const LLT &Ty = Query.Types[0];
789 const LLT &SrcTy = Query.Types[1];
790 return Ty.isVector() && !SrcTy.isPointerVector() &&
791 Ty.getElementType() != SrcTy.getElementType();
792 },
793 0, 1)
794 .clampNumElements(1, v4s16, v8s16)
795 .clampNumElements(1, v2s32, v4s32)
796 .clampMaxNumElements(1, s64, 2)
798 .libcallFor({{s32, s128}});
799
800 // Extensions
801 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
802 unsigned DstSize = Query.Types[0].getSizeInBits();
803
804 // Handle legal vectors using legalFor
805 if (Query.Types[0].isVector())
806 return false;
807
808 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
809 return false; // Extending to a scalar s128 needs narrowing.
810
811 const LLT &SrcTy = Query.Types[1];
812
813 // Make sure we fit in a register otherwise. Don't bother checking that
814 // the source type is below 128 bits. We shouldn't be allowing anything
815 // through which is wider than the destination in the first place.
816 unsigned SrcSize = SrcTy.getSizeInBits();
817 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
818 return false;
819
820 return true;
821 };
822 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
823 .legalIf(ExtLegalFunc)
824 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
825 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
827 .clampMaxNumElements(1, s8, 8)
828 .clampMaxNumElements(1, s16, 4)
829 .clampMaxNumElements(1, s32, 2)
830 // Tries to convert a large EXTEND into two smaller EXTENDs
831 .lowerIf([=](const LegalityQuery &Query) {
832 return (Query.Types[0].getScalarSizeInBits() >
833 Query.Types[1].getScalarSizeInBits() * 2) &&
834 Query.Types[0].isVector() &&
835 (Query.Types[1].getScalarSizeInBits() == 8 ||
836 Query.Types[1].getScalarSizeInBits() == 16);
837 })
838 .clampMinNumElements(1, s8, 8)
839 .clampMinNumElements(1, s16, 4)
841
843 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
845 .clampMaxNumElements(0, s8, 8)
846 .clampMaxNumElements(0, s16, 4)
847 .clampMaxNumElements(0, s32, 2)
849 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
850 0, s8)
851 .lowerIf([=](const LegalityQuery &Query) {
852 LLT DstTy = Query.Types[0];
853 LLT SrcTy = Query.Types[1];
854 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
855 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
856 })
857 .clampMinNumElements(0, s8, 8)
858 .clampMinNumElements(0, s16, 4)
859 .alwaysLegal();
860
861 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
862 .legalFor({{v8i8, v8i16}, {v4i16, v4i32}, {v2i32, v2i64}})
863 .clampNumElements(0, v2s32, v2s32);
864
865 getActionDefinitionsBuilder(G_SEXT_INREG)
866 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
867 .maxScalar(0, s64)
868 .clampNumElements(0, v8s8, v16s8)
869 .clampNumElements(0, v4s16, v8s16)
870 .clampNumElements(0, v2s32, v4s32)
871 .clampMaxNumElements(0, s64, 2)
872 .lower();
873
874 // FP conversions
876 .legalFor(
877 {{f16, f32}, {f16, f64}, {f32, f64}, {v4f16, v4f32}, {v2f32, v2f64}})
878 .legalFor(ST.hasBF16(), {{bf16, f32}, {v4bf16, v4f32}})
879 .libcallFor({{f16, f128}, {f32, f128}, {f64, f128}})
881 .customIf([](const LegalityQuery &Q) {
882 LLT DstTy = Q.Types[0];
883 LLT SrcTy = Q.Types[1];
884 return SrcTy.getScalarSizeInBits() == 64 &&
885 DstTy.getScalarSizeInBits() == 16;
886 })
887 .lowerFor({{bf16, f32}, {v4bf16, v4f32}})
888 // Clamp based on input
889 .clampNumElements(1, v4s32, v4s32)
890 .clampNumElements(1, v2s64, v2s64)
891 .scalarize(0);
892
893 getActionDefinitionsBuilder(G_FPEXT)
894 .legalFor({{f32, f16},
895 {f64, f16},
896 {f32, bf16},
897 {f64, f32},
898 {v4f32, v4f16},
899 {v4f32, v4bf16},
900 {v2f64, v2f32}})
901 .libcallFor({{f128, f64}, {f128, f32}, {f128, f16}})
904 [](const LegalityQuery &Q) {
905 LLT DstTy = Q.Types[0];
906 LLT SrcTy = Q.Types[1];
907 return SrcTy.isVector() && DstTy.isVector() &&
908 SrcTy.getScalarSizeInBits() == 16 &&
909 DstTy.getScalarSizeInBits() == 64;
910 },
911 changeElementTo(1, f32))
912 .clampNumElements(0, v4s32, v4s32)
913 .clampNumElements(0, v2s64, v2s64)
914 .scalarize(0);
915
916 // Conversions
917 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
918 .legalFor({{i32, f32},
919 {i64, f32},
920 {i32, f64},
921 {i64, f64},
922 {v2i32, v2f32},
923 {v4i32, v4f32},
924 {v2i64, v2f64}})
925 .legalFor(HasFP16,
926 {{i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
927 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
929 // The range of a fp16 value fits into an i17, so we can lower the width
930 // to i64.
932 [=](const LegalityQuery &Query) {
933 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
934 },
935 changeTo(0, i64))
938 .minScalar(0, s32)
939 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
941 [=](const LegalityQuery &Query) {
942 return Query.Types[0].getScalarSizeInBits() <= 64 &&
943 Query.Types[0].getScalarSizeInBits() >
944 Query.Types[1].getScalarSizeInBits();
945 },
947 .widenScalarIf(
948 [=](const LegalityQuery &Query) {
949 return Query.Types[1].getScalarSizeInBits() <= 64 &&
950 Query.Types[0].getScalarSizeInBits() <
951 Query.Types[1].getScalarSizeInBits();
952 },
954 .clampNumElements(0, v4s16, v8s16)
955 .clampNumElements(0, v2s32, v4s32)
956 .clampMaxNumElements(0, s64, 2)
957 .libcallFor(
958 {{i32, f128}, {i64, f128}, {i128, f128}, {i128, f32}, {i128, f64}});
959
960 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
961 .legalFor({{i32, f32},
962 {i64, f32},
963 {i32, f64},
964 {i64, f64},
965 {v2i32, v2f32},
966 {v4i32, v4f32},
967 {v2i64, v2f64}})
968 .legalFor(
969 HasFP16,
970 {{i16, f16}, {i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
971 // Handle types larger than i64 by scalarizing/lowering.
972 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
974 // The range of a fp16 value fits into an i17, so we can lower the width
975 // to i64.
977 [=](const LegalityQuery &Query) {
978 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
979 },
980 changeTo(0, i64))
981 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
983 .widenScalarToNextPow2(0, /*MinSize=*/32)
984 .minScalar(0, s32)
985 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
987 [=](const LegalityQuery &Query) {
988 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
989 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
990 ITySize > Query.Types[1].getScalarSizeInBits();
991 },
993 .widenScalarIf(
994 [=](const LegalityQuery &Query) {
995 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
996 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
997 Query.Types[0].getScalarSizeInBits() < FTySize;
998 },
1001 .clampNumElements(0, v4s16, v8s16)
1002 .clampNumElements(0, v2s32, v4s32)
1003 .clampMaxNumElements(0, s64, 2);
1004
1005 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
1006 .legalFor({{f32, i32},
1007 {f64, i32},
1008 {f32, i64},
1009 {f64, i64},
1010 {v2f32, v2i32},
1011 {v4f32, v4i32},
1012 {v2f64, v2i64}})
1013 .legalFor(HasFP16,
1014 {{f16, i32}, {f16, i64}, {v4f16, v4i16}, {v8f16, v8i16}})
1015 .unsupportedIf([&](const LegalityQuery &Query) {
1016 return Query.Types[0].getScalarType().isBFloat16();
1017 })
1018 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
1022 .minScalar(1, f32)
1023 .lowerIf([](const LegalityQuery &Query) {
1024 return Query.Types[1].isVector() &&
1025 Query.Types[1].getScalarSizeInBits() == 64 &&
1026 Query.Types[0].getScalarSizeInBits() == 16;
1027 })
1028 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
1029 .scalarizeIf(
1030 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
1031 [](const LegalityQuery &Query) {
1032 return Query.Types[0].getScalarSizeInBits() == 32 &&
1033 Query.Types[1].getScalarSizeInBits() == 64;
1034 },
1035 0)
1036 .widenScalarIf(
1037 [](const LegalityQuery &Query) {
1038 return Query.Types[1].getScalarSizeInBits() <= 64 &&
1039 Query.Types[0].getScalarSizeInBits() <
1040 Query.Types[1].getScalarSizeInBits();
1041 },
1043 .widenScalarIf(
1044 [](const LegalityQuery &Query) {
1045 return Query.Types[0].getScalarSizeInBits() <= 64 &&
1046 Query.Types[0].getScalarSizeInBits() >
1047 Query.Types[1].getScalarSizeInBits();
1048 },
1050 .clampNumElements(0, v4s16, v8s16)
1051 .clampNumElements(0, v2s32, v4s32)
1052 .clampMaxNumElements(0, s64, 2)
1053 .libcallFor({{f16, i128},
1054 {f32, i128},
1055 {f64, i128},
1056 {f128, i128},
1057 {f128, i32},
1058 {f128, i64}});
1059
1060 // Control-flow
1061 getActionDefinitionsBuilder(G_BR).alwaysLegal();
1062 getActionDefinitionsBuilder(G_BRCOND)
1063 .legalFor({s32})
1064 .clampScalar(0, s32, s32);
1065 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1066
1067 getActionDefinitionsBuilder(G_SELECT)
1068 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1069 .widenScalarToNextPow2(0)
1070 .clampScalar(0, s32, s64)
1071 .clampScalar(1, s32, s32)
1074 .lowerIf(isVector(0));
1075
1076 // Pointer-handling
1077 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1078
1079 if (TM.getCodeModel() == CodeModel::Small)
1080 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1081 else
1082 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1083
1084 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1085 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1086
1087 getActionDefinitionsBuilder(G_PTRTOINT)
1088 .legalFor({{i64, p0}, {v2i64, v2p0}})
1089 .widenScalarToNextPow2(0, 64)
1090 .clampScalar(0, s64, s64)
1091 .clampMaxNumElements(0, s64, 2);
1092
1093 getActionDefinitionsBuilder(G_INTTOPTR)
1094 .unsupportedIf([&](const LegalityQuery &Query) {
1095 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1096 })
1097 .legalFor({{p0, i64}, {v2p0, v2i64}})
1098 .clampMaxNumElements(1, s64, 2);
1099
1100 // Casts for 32 and 64-bit width type are just copies.
1101 // Same for 128-bit width type, except they are on the FPR bank.
1102 getActionDefinitionsBuilder(G_BITCAST)
1104 // Keeping 32-bit instructions legal to prevent regression in some tests
1105 .legalForCartesianProduct({s32, v2s16, v4s8})
1106 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1107 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1108 .customIf([=](const LegalityQuery &Query) {
1109 // Handle casts from i1 vectors to scalars.
1110 LLT DstTy = Query.Types[0];
1111 LLT SrcTy = Query.Types[1];
1112 return DstTy.isScalar() && SrcTy.isVector() &&
1113 SrcTy.getScalarSizeInBits() == 1;
1114 })
1115 .lowerIf([=](const LegalityQuery &Query) {
1116 return Query.Types[0].isVector() != Query.Types[1].isVector();
1117 })
1119 .clampNumElements(0, v8s8, v16s8)
1120 .clampNumElements(0, v4s16, v8s16)
1121 .clampNumElements(0, v2s32, v4s32)
1122 .clampMaxNumElements(0, s64, 2)
1123 .lower();
1124
1125 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1126
1127 // va_list must be a pointer, but most sized types are pretty easy to handle
1128 // as the destination.
1129 getActionDefinitionsBuilder(G_VAARG)
1130 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1131 .clampScalar(0, s8, s64)
1132 .widenScalarToNextPow2(0, /*Min*/ 8);
1133
1134 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1135 .lowerIf(
1136 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1137
1138 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1139
1140 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1141 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1142 .customFor(!UseOutlineAtomics, {{s128, p0}})
1143 .libcallFor(UseOutlineAtomics,
1144 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1145 .clampScalar(0, s32, s64);
1146
1147 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1148 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1149 G_ATOMICRMW_XOR})
1150 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1151 .libcallFor(UseOutlineAtomics,
1152 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1153 .clampScalar(0, s32, s64);
1154
1155 // Do not outline these atomics operations, as per comment in
1156 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1157 getActionDefinitionsBuilder(
1158 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1159 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1160 .clampScalar(0, s32, s64);
1161
1162 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1163
1164 // Merge/Unmerge
1165 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1166 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1167 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1168 getActionDefinitionsBuilder(Op)
1169 .widenScalarToNextPow2(LitTyIdx, 8)
1170 .widenScalarToNextPow2(BigTyIdx, 32)
1171 .clampScalar(LitTyIdx, s8, s64)
1172 .clampScalar(BigTyIdx, s32, s128)
1173 .legalIf([=](const LegalityQuery &Q) {
1174 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1175 case 32:
1176 case 64:
1177 case 128:
1178 break;
1179 default:
1180 return false;
1181 }
1182 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1183 case 8:
1184 case 16:
1185 case 32:
1186 case 64:
1187 return true;
1188 default:
1189 return false;
1190 }
1191 });
1192 }
1193
1194 // TODO : nxv4s16, nxv2s16, nxv2s32
1195 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1196 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1197 {s16, nxv8s16, s64},
1198 {s32, nxv4s32, s64},
1199 {s64, nxv2s64, s64}})
1200 .unsupportedIf([=](const LegalityQuery &Query) {
1201 const LLT &EltTy = Query.Types[1].getElementType();
1202 if (Query.Types[1].isScalableVector())
1203 return false;
1204 return Query.Types[0] != EltTy;
1205 })
1206 .minScalar(2, s64)
1207 .customIf([=](const LegalityQuery &Query) {
1208 const LLT &VecTy = Query.Types[1];
1209 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1210 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1211 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1212 })
1213 .minScalarOrEltIf(
1214 [=](const LegalityQuery &Query) {
1215 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1216 // cause the total vec size to be > 128b.
1217 return Query.Types[1].isFixedVector() &&
1218 Query.Types[1].getNumElements() <= 2;
1219 },
1220 0, s64)
1221 .minScalarOrEltIf(
1222 [=](const LegalityQuery &Query) {
1223 return Query.Types[1].isFixedVector() &&
1224 Query.Types[1].getNumElements() <= 4;
1225 },
1226 0, s32)
1227 .minScalarOrEltIf(
1228 [=](const LegalityQuery &Query) {
1229 return Query.Types[1].isFixedVector() &&
1230 Query.Types[1].getNumElements() <= 8;
1231 },
1232 0, s16)
1233 .minScalarOrEltIf(
1234 [=](const LegalityQuery &Query) {
1235 return Query.Types[1].isFixedVector() &&
1236 Query.Types[1].getNumElements() <= 16;
1237 },
1238 0, s8)
1239 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1240 .moreElementsToNextPow2(1)
1241 .clampMaxNumElements(1, s64, 2)
1242 .clampMaxNumElements(1, s32, 4)
1243 .clampMaxNumElements(1, s16, 8)
1244 .clampMaxNumElements(1, s8, 16)
1245 .clampMaxNumElements(1, p0, 2)
1246 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1);
1247
1248 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1249 .legalIf(
1250 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1251 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1252 {nxv8s16, s32, s64},
1253 {nxv4s32, s32, s64},
1254 {nxv2s64, s64, s64}})
1256 .widenVectorEltsToVectorMinSize(0, 64)
1257 .clampNumElements(0, v8s8, v16s8)
1258 .clampNumElements(0, v4s16, v8s16)
1259 .clampNumElements(0, v2s32, v4s32)
1260 .clampMaxNumElements(0, s64, 2)
1261 .clampMaxNumElements(0, p0, 2)
1262 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
1263
1264 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1265 .legalFor({{v8s8, s8},
1266 {v16s8, s8},
1267 {v4s16, s16},
1268 {v8s16, s16},
1269 {v2s32, s32},
1270 {v4s32, s32},
1271 {v2s64, s64},
1272 {v2p0, p0}})
1273 .clampNumElements(0, v4s32, v4s32)
1274 .clampNumElements(0, v2s64, v2s64)
1275 .minScalarOrElt(0, s8)
1276 .widenVectorEltsToVectorMinSize(0, 64)
1277 .widenScalarOrEltToNextPow2(0)
1278 .minScalarSameAs(1, 0);
1279
1280 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1281
1282 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1283 .legalIf([=](const LegalityQuery &Query) {
1284 const LLT &DstTy = Query.Types[0];
1285 const LLT &SrcTy = Query.Types[1];
1286 // For now just support the TBL2 variant which needs the source vectors
1287 // to be the same size as the dest.
1288 if (DstTy != SrcTy)
1289 return false;
1290 return llvm::is_contained(
1291 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1292 })
1293 .moreElementsIf(
1294 [](const LegalityQuery &Query) {
1295 return Query.Types[0].getNumElements() >
1296 Query.Types[1].getNumElements();
1297 },
1298 changeTo(1, 0))
1300 .moreElementsIf(
1301 [](const LegalityQuery &Query) {
1302 return Query.Types[0].getNumElements() <
1303 Query.Types[1].getNumElements();
1304 },
1305 changeTo(0, 1))
1306 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1307 .clampNumElements(0, v8s8, v16s8)
1308 .clampNumElements(0, v4s16, v8s16)
1309 .clampNumElements(0, v4s32, v4s32)
1310 .clampNumElements(0, v2s64, v2s64)
1311 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1312 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1313 // Bitcast pointers vector to i64.
1314 const LLT DstTy = Query.Types[0];
1315 return std::pair(
1316 0, LLT::vector(DstTy.getElementCount(), LLT::integer(64)));
1317 });
1318
1319 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1320 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1321 .customIf([=](const LegalityQuery &Query) {
1322 return Query.Types[0].isFixedVector() &&
1323 Query.Types[0].getScalarSizeInBits() < 8;
1324 })
1325 .bitcastIf(
1326 [=](const LegalityQuery &Query) {
1327 return Query.Types[0].isFixedVector() &&
1328 Query.Types[1].isFixedVector() &&
1329 Query.Types[0].getScalarSizeInBits() >= 8 &&
1330 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1331 Query.Types[0].getSizeInBits() <= 128 &&
1332 Query.Types[1].getSizeInBits() <= 64;
1333 },
1334 [=](const LegalityQuery &Query) {
1335 const LLT DstTy = Query.Types[0];
1336 const LLT SrcTy = Query.Types[1];
1337 return std::pair(
1338 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1341 SrcTy.getNumElements())));
1342 });
1343
1344 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1345 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1347 .immIdx(0); // Inform verifier imm idx 0 is handled.
1348
1349 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1350 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1351 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1352
1353 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1354
1355 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1356
1357 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1358
1359 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1360
1361 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1362
1363 if (ST.hasMOPS()) {
1364 // G_BZERO is not supported. Currently it is only emitted by
1365 // PreLegalizerCombiner for G_MEMSET with zero constant.
1366 getActionDefinitionsBuilder(G_BZERO).unsupported();
1367
1368 getActionDefinitionsBuilder(G_MEMSET)
1369 .legalForCartesianProduct({p0}, {s64}, {s64})
1370 .customForCartesianProduct({p0}, {s8}, {s64})
1371 .immIdx(0); // Inform verifier imm idx 0 is handled.
1372
1373 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1374 .legalForCartesianProduct({p0}, {p0}, {s64})
1375 .immIdx(0); // Inform verifier imm idx 0 is handled.
1376
1377 // G_MEMCPY_INLINE does not have a tailcall immediate
1378 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1379 .legalForCartesianProduct({p0}, {p0}, {s64});
1380
1381 getActionDefinitionsBuilder(G_MEMSET_INLINE)
1382 .legalForCartesianProduct({p0}, {s64}, {s64})
1383 .customForCartesianProduct({p0}, {s8}, {s64});
1384 } else {
1385 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1386 .libcall();
1387 }
1388
1389 // For fadd reductions we have pairwise operations available. We treat the
1390 // usual legal types as legal and handle the lowering to pairwise instructions
1391 // later.
1392 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1393 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1394 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1395 .minScalarOrElt(0, MinFPScalar)
1396 .clampMaxNumElements(1, s64, 2)
1397 .clampMaxNumElements(1, s32, 4)
1398 .clampMaxNumElements(1, s16, 8)
1399 .moreElementsToNextPow2(1)
1400 .scalarize(1)
1401 .lower();
1402
1403 // For fmul reductions we need to split up into individual operations. We
1404 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1405 // smaller types, followed by scalarizing what remains.
1406 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1407 .minScalarOrElt(0, MinFPScalar)
1408 .clampMaxNumElements(1, s64, 2)
1409 .clampMaxNumElements(1, s32, 4)
1410 .clampMaxNumElements(1, s16, 8)
1411 .clampMaxNumElements(1, s32, 2)
1412 .clampMaxNumElements(1, s16, 4)
1413 .scalarize(1)
1414 .lower();
1415
1416 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1417 .scalarize(2)
1418 .lower();
1419
1420 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1421 .legalFor({{i8, v8i8},
1422 {i8, v16i8},
1423 {i16, v4i16},
1424 {i16, v8i16},
1425 {i32, v2i32},
1426 {i32, v4i32},
1427 {i64, v2i64}})
1429 .clampMaxNumElements(1, s64, 2)
1430 .clampMaxNumElements(1, s32, 4)
1431 .clampMaxNumElements(1, s16, 8)
1432 .clampMaxNumElements(1, s8, 16)
1433 .widenVectorEltsToVectorMinSize(1, 64)
1434 .scalarize(1);
1435
1436 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1437 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1438 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1439 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1440 .minScalarOrElt(0, MinFPScalar)
1441 .clampMaxNumElements(1, s64, 2)
1442 .clampMaxNumElements(1, s32, 4)
1443 .clampMaxNumElements(1, s16, 8)
1444 .scalarize(1)
1445 .lower();
1446
1447 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1448 .clampMaxNumElements(1, s32, 2)
1449 .clampMaxNumElements(1, s16, 4)
1450 .clampMaxNumElements(1, s8, 8)
1451 .scalarize(1)
1452 .lower();
1453
1454 getActionDefinitionsBuilder(
1455 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1456 .legalFor({{i8, v8i8},
1457 {i8, v16i8},
1458 {i16, v4i16},
1459 {i16, v8i16},
1460 {i32, v2i32},
1461 {i32, v4i32}})
1462 .moreElementsIf(
1463 [=](const LegalityQuery &Query) {
1464 return Query.Types[1].isVector() &&
1465 Query.Types[1].getElementType() != s8 &&
1466 Query.Types[1].getNumElements() & 1;
1467 },
1469 .clampMaxNumElements(1, s64, 2)
1470 .clampMaxNumElements(1, s32, 4)
1471 .clampMaxNumElements(1, s16, 8)
1472 .clampMaxNumElements(1, s8, 16)
1473 .scalarize(1)
1474 .lower();
1475
1476 getActionDefinitionsBuilder(
1477 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1478 // Try to break down into smaller vectors as long as they're at least 64
1479 // bits. This lets us use vector operations for some parts of the
1480 // reduction.
1481 .fewerElementsIf(
1482 [=](const LegalityQuery &Q) {
1483 LLT SrcTy = Q.Types[1];
1484 if (SrcTy.isScalar())
1485 return false;
1486 if (!isPowerOf2_32(SrcTy.getNumElements()))
1487 return false;
1488 // We can usually perform 64b vector operations.
1489 return SrcTy.getSizeInBits() > 64;
1490 },
1491 [=](const LegalityQuery &Q) {
1492 LLT SrcTy = Q.Types[1];
1493 return std::make_pair(1, SrcTy.divide(2));
1494 })
1495 .scalarize(1)
1496 .lower();
1497
1498 // TODO: Update this to correct handling when adding AArch64/SVE support.
1499 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1500
1501 // Access to floating-point environment.
1502 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1503 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1504 .libcall();
1505
1506 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1507
1508 getActionDefinitionsBuilder(G_PREFETCH).custom();
1509
1510 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1511
1512 getActionDefinitionsBuilder({G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS})
1513 .alwaysLegal();
1514 getActionDefinitionsBuilder(G_FENCE).alwaysLegal();
1515 getActionDefinitionsBuilder(G_INVOKE_REGION_START).alwaysLegal();
1516
1517 getLegacyLegalizerInfo().computeTables();
1518 verify(*ST.getInstrInfo());
1519}
1520
1523 LostDebugLocObserver &LocObserver) const {
1524 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1525 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1526 GISelChangeObserver &Observer = Helper.Observer;
1527 switch (MI.getOpcode()) {
1528 default:
1529 // No idea what to do.
1530 return false;
1531 case TargetOpcode::G_VAARG:
1532 return legalizeVaArg(MI, MRI, MIRBuilder);
1533 case TargetOpcode::G_LOAD:
1534 case TargetOpcode::G_STORE:
1535 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1536 case TargetOpcode::G_SHL:
1537 case TargetOpcode::G_ASHR:
1538 case TargetOpcode::G_LSHR:
1539 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1540 case TargetOpcode::G_GLOBAL_VALUE:
1541 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1542 case TargetOpcode::G_SBFX:
1543 case TargetOpcode::G_UBFX:
1544 return legalizeBitfieldExtract(MI, MRI, Helper);
1545 case TargetOpcode::G_FSHL:
1546 case TargetOpcode::G_FSHR:
1547 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1548 case TargetOpcode::G_ROTR:
1549 return legalizeRotate(MI, MRI, Helper);
1550 case TargetOpcode::G_CTPOP:
1551 return legalizeCTPOP(MI, MRI, Helper);
1552 case TargetOpcode::G_ATOMIC_CMPXCHG:
1553 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1554 case TargetOpcode::G_CTTZ:
1555 return legalizeCTTZ(MI, Helper);
1556 case TargetOpcode::G_BZERO:
1557 case TargetOpcode::G_MEMCPY:
1558 case TargetOpcode::G_MEMMOVE:
1559 case TargetOpcode::G_MEMSET:
1560 case TargetOpcode::G_MEMSET_INLINE:
1561 return legalizeMemOps(MI, Helper);
1562 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1563 return legalizeExtractVectorElt(MI, MRI, Helper);
1564 case TargetOpcode::G_DYN_STACKALLOC:
1565 return legalizeDynStackAlloc(MI, Helper);
1566 case TargetOpcode::G_PREFETCH:
1567 return legalizePrefetch(MI, Helper);
1568 case TargetOpcode::G_ABS:
1569 return Helper.lowerAbsToCNeg(MI);
1570 case TargetOpcode::G_ICMP:
1571 return legalizeICMP(MI, MRI, MIRBuilder);
1572 case TargetOpcode::G_BITCAST:
1573 return legalizeBitcast(MI, Helper);
1574 case TargetOpcode::G_CONCAT_VECTORS:
1575 return legalizeConcatVectors(MI, MRI, MIRBuilder);
1576 case TargetOpcode::G_FPTRUNC:
1577 // In order to lower f16 to f64 properly, we need to use f32 as an
1578 // intermediary
1579 return legalizeFptrunc(MI, MIRBuilder, MRI);
1580 }
1581
1582 llvm_unreachable("expected switch to return");
1583}
1584
1585bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1586 LegalizerHelper &Helper) const {
1587 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1588 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1589 // We're trying to handle casts from i1 vectors to scalars but reloading from
1590 // stack.
1591 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1592 SrcTy.getElementType() != LLT::scalar(1))
1593 return false;
1594
1595 Helper.createStackStoreLoad(DstReg, SrcReg);
1596 MI.eraseFromParent();
1597 return true;
1598}
1599
1600bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1602 MachineIRBuilder &MIRBuilder,
1603 GISelChangeObserver &Observer,
1604 LegalizerHelper &Helper) const {
1605 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1606 MI.getOpcode() == TargetOpcode::G_FSHR);
1607
1608 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1609 // lowering
1610 Register ShiftNo = MI.getOperand(3).getReg();
1611 LLT ShiftTy = MRI.getType(ShiftNo);
1612 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1613
1614 // Adjust shift amount according to Opcode (FSHL/FSHR)
1615 // Convert FSHL to FSHR
1616 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1617 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1618
1619 // Lower non-constant shifts and leave zero shifts to the optimizer.
1620 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1621 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1623
1624 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1625
1626 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1627
1628 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1629 // in the range of 0 <-> BitWidth, it is legal
1630 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1631 VRegAndVal->Value.ult(BitWidth))
1632 return true;
1633
1634 // Cast the ShiftNumber to a 64-bit type
1635 auto Cast64 = MIRBuilder.buildConstant(LLT::integer(64), Amount.zext(64));
1636
1637 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1638 Observer.changingInstr(MI);
1639 MI.getOperand(3).setReg(Cast64.getReg(0));
1640 Observer.changedInstr(MI);
1641 }
1642 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1643 // instruction
1644 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1645 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1646 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1647 Cast64.getReg(0)});
1648 MI.eraseFromParent();
1649 }
1650 return true;
1651}
1652
1653bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1655 MachineIRBuilder &MIRBuilder) const {
1656 Register DstReg = MI.getOperand(0).getReg();
1657 Register SrcReg1 = MI.getOperand(2).getReg();
1658 Register SrcReg2 = MI.getOperand(3).getReg();
1659 LLT DstTy = MRI.getType(DstReg);
1660 LLT SrcTy = MRI.getType(SrcReg1);
1661
1662 // Check the vector types are legal
1663 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1664 DstTy.getNumElements() != SrcTy.getNumElements() ||
1665 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1666 return false;
1667
1668 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1669 // following passes
1670 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1671 if (Pred != CmpInst::ICMP_NE)
1672 return true;
1673 Register CmpReg =
1674 MIRBuilder
1675 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1676 .getReg(0);
1677 MIRBuilder.buildNot(DstReg, CmpReg);
1678
1679 MI.eraseFromParent();
1680 return true;
1681}
1682
1683bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1685 LegalizerHelper &Helper) const {
1686 // To allow for imported patterns to match, we ensure that the rotate amount
1687 // is 64b with an extension.
1688 Register AmtReg = MI.getOperand(2).getReg();
1689 LLT AmtTy = MRI.getType(AmtReg);
1690 (void)AmtTy;
1691 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1692 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1693 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::integer(64), AmtReg);
1694 Helper.Observer.changingInstr(MI);
1695 MI.getOperand(2).setReg(NewAmt.getReg(0));
1696 Helper.Observer.changedInstr(MI);
1697 return true;
1698}
1699
1700bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1702 GISelChangeObserver &Observer) const {
1703 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1704 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1705 // G_ADD_LOW instructions.
1706 // By splitting this here, we can optimize accesses in the small code model by
1707 // folding in the G_ADD_LOW into the load/store offset.
1708 auto &GlobalOp = MI.getOperand(1);
1709 // Don't modify an intrinsic call.
1710 if (GlobalOp.isSymbol())
1711 return true;
1712 const auto* GV = GlobalOp.getGlobal();
1713 if (GV->isThreadLocal())
1714 return true; // Don't want to modify TLS vars.
1715
1716 auto &TM = ST->getTargetLowering()->getTargetMachine();
1717 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1718
1719 if (OpFlags & AArch64II::MO_GOT)
1720 return true;
1721
1722 auto Offset = GlobalOp.getOffset();
1723 Register DstReg = MI.getOperand(0).getReg();
1724 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1725 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1726 // Set the regclass on the dest reg too.
1727 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1728
1729 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1730 // by creating a MOVK that sets bits 48-63 of the register to (global address
1731 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1732 // prevent an incorrect tag being generated during relocation when the
1733 // global appears before the code section. Without the offset, a global at
1734 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1735 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1736 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1737 // instead of `0xf`.
1738 // This assumes that we're in the small code model so we can assume a binary
1739 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1740 // binary must also be loaded into address range [0, 2^48). Both of these
1741 // properties need to be ensured at runtime when using tagged addresses.
1742 if (OpFlags & AArch64II::MO_TAGGED) {
1743 assert(!Offset &&
1744 "Should not have folded in an offset for a tagged global!");
1745 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1746 .addGlobalAddress(GV, 0x100000000,
1748 .addImm(48);
1749 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1750 }
1751
1752 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1753 .addGlobalAddress(GV, Offset,
1755 MI.eraseFromParent();
1756 return true;
1757}
1758
1760 MachineInstr &MI) const {
1761 MachineIRBuilder &MIB = Helper.MIRBuilder;
1762 MachineRegisterInfo &MRI = *MIB.getMRI();
1763
1764 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1765 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1766 MI.eraseFromParent();
1767 return true;
1768 };
1769 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1770 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1771 {MI.getOperand(2), MI.getOperand(3)});
1772 MI.eraseFromParent();
1773 return true;
1774 };
1775 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1776 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1777 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1778 MI.eraseFromParent();
1779 return true;
1780 };
1781
1782 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1783 switch (IntrinsicID) {
1784 case Intrinsic::vacopy: {
1785 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1786 unsigned VaListSize =
1787 (ST->isTargetDarwin() || ST->isTargetWindows())
1788 ? PtrSize
1789 : ST->isTargetILP32() ? 20 : 32;
1790
1791 MachineFunction &MF = *MI.getMF();
1793 LLT::scalar(VaListSize * 8));
1794 MIB.buildLoad(Val, MI.getOperand(2),
1797 VaListSize, Align(PtrSize)));
1798 MIB.buildStore(Val, MI.getOperand(1),
1801 VaListSize, Align(PtrSize)));
1802 MI.eraseFromParent();
1803 return true;
1804 }
1805 case Intrinsic::get_dynamic_area_offset: {
1806 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1807 MI.eraseFromParent();
1808 return true;
1809 }
1810 case Intrinsic::aarch64_mops_memset_tag: {
1811 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1812 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1813 // the instruction).
1814 auto &Value = MI.getOperand(3);
1815 Register ExtValueReg = MIB.buildAnyExt(LLT::integer(64), Value).getReg(0);
1816 Value.setReg(ExtValueReg);
1817 return true;
1818 }
1819 case Intrinsic::aarch64_prefetch: {
1820 auto &AddrVal = MI.getOperand(1);
1821
1822 int64_t IsWrite = MI.getOperand(2).getImm();
1823 int64_t Target = MI.getOperand(3).getImm();
1824 int64_t IsStream = MI.getOperand(4).getImm();
1825 int64_t IsData = MI.getOperand(5).getImm();
1826
1827 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1828 (!IsData << 3) | // IsDataCache bit
1829 (Target << 1) | // Cache level bits
1830 (unsigned)IsStream; // Stream bit
1831
1832 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1833 MI.eraseFromParent();
1834 return true;
1835 }
1836 case Intrinsic::aarch64_range_prefetch: {
1837 auto &AddrVal = MI.getOperand(1);
1838
1839 int64_t IsWrite = MI.getOperand(2).getImm();
1840 int64_t IsStream = MI.getOperand(3).getImm();
1841 unsigned PrfOp = (IsStream << 2) | IsWrite;
1842
1843 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1844 .addImm(PrfOp)
1845 .add(AddrVal)
1846 .addUse(MI.getOperand(4).getReg()); // Metadata
1847 MI.eraseFromParent();
1848 return true;
1849 }
1850 case Intrinsic::aarch64_prefetch_ir: {
1851 auto &AddrVal = MI.getOperand(1);
1852 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1853 MI.eraseFromParent();
1854 return true;
1855 }
1856 case Intrinsic::aarch64_neon_uaddv:
1857 case Intrinsic::aarch64_neon_saddv:
1858 case Intrinsic::aarch64_neon_umaxv:
1859 case Intrinsic::aarch64_neon_smaxv:
1860 case Intrinsic::aarch64_neon_uminv:
1861 case Intrinsic::aarch64_neon_sminv: {
1862 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1863 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1864 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1865
1866 auto OldDst = MI.getOperand(0).getReg();
1867 auto OldDstTy = MRI.getType(OldDst);
1868 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1869 if (OldDstTy == NewDstTy)
1870 return true;
1871
1872 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1873
1874 Helper.Observer.changingInstr(MI);
1875 MI.getOperand(0).setReg(NewDst);
1876 Helper.Observer.changedInstr(MI);
1877
1878 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1879 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1880 OldDst, NewDst);
1881
1882 return true;
1883 }
1884 case Intrinsic::aarch64_neon_uaddlp:
1885 case Intrinsic::aarch64_neon_saddlp: {
1886 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1887 ? AArch64::G_UADDLP
1888 : AArch64::G_SADDLP;
1889 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1890 MI.eraseFromParent();
1891
1892 return true;
1893 }
1894 case Intrinsic::aarch64_neon_uaddlv:
1895 case Intrinsic::aarch64_neon_saddlv: {
1896 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1897 ? AArch64::G_UADDLV
1898 : AArch64::G_SADDLV;
1899 Register DstReg = MI.getOperand(0).getReg();
1900 Register SrcReg = MI.getOperand(2).getReg();
1901 LLT DstTy = MRI.getType(DstReg);
1902
1903 LLT MidTy, ExtTy;
1904 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1905 ExtTy = LLT::integer(32);
1906 MidTy = LLT::fixed_vector(4, ExtTy);
1907 } else {
1908 ExtTy = LLT::integer(64);
1909 MidTy = LLT::fixed_vector(2, ExtTy);
1910 }
1911
1912 Register MidReg =
1913 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1914 Register ZeroReg =
1915 MIB.buildConstant(LLT::integer(64), 0)->getOperand(0).getReg();
1916 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1917 {MidReg, ZeroReg})
1918 .getReg(0);
1919
1920 if (DstTy.getScalarSizeInBits() < 32)
1921 MIB.buildTrunc(DstReg, ExtReg);
1922 else
1923 MIB.buildCopy(DstReg, ExtReg);
1924
1925 MI.eraseFromParent();
1926
1927 return true;
1928 }
1929 case Intrinsic::aarch64_neon_smax:
1930 return LowerBinOp(TargetOpcode::G_SMAX);
1931 case Intrinsic::aarch64_neon_smin:
1932 return LowerBinOp(TargetOpcode::G_SMIN);
1933 case Intrinsic::aarch64_neon_umax:
1934 return LowerBinOp(TargetOpcode::G_UMAX);
1935 case Intrinsic::aarch64_neon_umin:
1936 return LowerBinOp(TargetOpcode::G_UMIN);
1937 case Intrinsic::aarch64_neon_fmax:
1938 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1939 case Intrinsic::aarch64_neon_fmin:
1940 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1941 case Intrinsic::aarch64_neon_fmaxnm:
1942 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1943 case Intrinsic::aarch64_neon_fminnm:
1944 return LowerBinOp(TargetOpcode::G_FMINNUM);
1945 case Intrinsic::aarch64_neon_pmul:
1946 return LowerBinOp(TargetOpcode::G_CLMUL);
1947 case Intrinsic::aarch64_neon_pmull:
1948 case Intrinsic::aarch64_neon_pmull64:
1949 return LowerBinOp(AArch64::G_PMULL);
1950 case Intrinsic::aarch64_neon_smull:
1951 return LowerBinOp(AArch64::G_SMULL);
1952 case Intrinsic::aarch64_neon_umull:
1953 return LowerBinOp(AArch64::G_UMULL);
1954 case Intrinsic::aarch64_neon_sabd:
1955 return LowerBinOp(TargetOpcode::G_ABDS);
1956 case Intrinsic::aarch64_neon_uabd:
1957 return LowerBinOp(TargetOpcode::G_ABDU);
1958 case Intrinsic::aarch64_neon_uhadd:
1959 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1960 case Intrinsic::aarch64_neon_urhadd:
1961 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1962 case Intrinsic::aarch64_neon_shadd:
1963 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1964 case Intrinsic::aarch64_neon_srhadd:
1965 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1966 case Intrinsic::aarch64_neon_sqshrn: {
1967 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1968 return true;
1969 // Create right shift instruction. Store the output register in Shr.
1970 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1971 {MRI.getType(MI.getOperand(2).getReg())},
1972 {MI.getOperand(2), MI.getOperand(3).getImm()});
1973 // Build the narrow intrinsic, taking in Shr.
1974 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1975 MI.eraseFromParent();
1976 return true;
1977 }
1978 case Intrinsic::aarch64_neon_sqshrun: {
1979 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1980 return true;
1981 // Create right shift instruction. Store the output register in Shr.
1982 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1983 {MRI.getType(MI.getOperand(2).getReg())},
1984 {MI.getOperand(2), MI.getOperand(3).getImm()});
1985 // Build the narrow intrinsic, taking in Shr.
1986 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1987 MI.eraseFromParent();
1988 return true;
1989 }
1990 case Intrinsic::aarch64_neon_sqrshrn: {
1991 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1992 return true;
1993 // Create right shift instruction. Store the output register in Shr.
1994 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1995 {MRI.getType(MI.getOperand(2).getReg())},
1996 {MI.getOperand(2), MI.getOperand(3).getImm()});
1997 // Build the narrow intrinsic, taking in Shr.
1998 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1999 MI.eraseFromParent();
2000 return true;
2001 }
2002 case Intrinsic::aarch64_neon_sqrshrun: {
2003 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2004 return true;
2005 // Create right shift instruction. Store the output register in Shr.
2006 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
2007 {MRI.getType(MI.getOperand(2).getReg())},
2008 {MI.getOperand(2), MI.getOperand(3).getImm()});
2009 // Build the narrow intrinsic, taking in Shr.
2010 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
2011 MI.eraseFromParent();
2012 return true;
2013 }
2014 case Intrinsic::aarch64_neon_uqrshrn: {
2015 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2016 return true;
2017 // Create right shift instruction. Store the output register in Shr.
2018 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
2019 {MRI.getType(MI.getOperand(2).getReg())},
2020 {MI.getOperand(2), MI.getOperand(3).getImm()});
2021 // Build the narrow intrinsic, taking in Shr.
2022 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2023 MI.eraseFromParent();
2024 return true;
2025 }
2026 case Intrinsic::aarch64_neon_uqshrn: {
2027 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2028 return true;
2029 // Create right shift instruction. Store the output register in Shr.
2030 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
2031 {MRI.getType(MI.getOperand(2).getReg())},
2032 {MI.getOperand(2), MI.getOperand(3).getImm()});
2033 // Build the narrow intrinsic, taking in Shr.
2034 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2035 MI.eraseFromParent();
2036 return true;
2037 }
2038 case Intrinsic::aarch64_neon_sqshlu: {
2039 // Check if last operand is constant vector dup
2040 auto ShiftAmount = isConstantOrConstantSplatVector(
2041 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
2042 if (ShiftAmount) {
2043 // If so, create a new intrinsic with the correct shift amount
2044 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
2045 {MI.getOperand(2)})
2046 .addImm(ShiftAmount->getSExtValue());
2047 MI.eraseFromParent();
2048 return true;
2049 }
2050 return false;
2051 }
2052 case Intrinsic::aarch64_neon_vsli: {
2053 MIB.buildInstr(
2054 AArch64::G_SLI, {MI.getOperand(0)},
2055 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2056 MI.eraseFromParent();
2057 break;
2058 }
2059 case Intrinsic::aarch64_neon_vsri: {
2060 MIB.buildInstr(
2061 AArch64::G_SRI, {MI.getOperand(0)},
2062 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2063 MI.eraseFromParent();
2064 break;
2065 }
2066 case Intrinsic::aarch64_neon_abs: {
2067 // Lower the intrinsic to G_ABS.
2068 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
2069 MI.eraseFromParent();
2070 return true;
2071 }
2072 case Intrinsic::aarch64_neon_sqadd: {
2073 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2074 return LowerBinOp(TargetOpcode::G_SADDSAT);
2075 break;
2076 }
2077 case Intrinsic::aarch64_neon_sqsub: {
2078 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2079 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2080 break;
2081 }
2082 case Intrinsic::aarch64_neon_uqadd: {
2083 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2084 return LowerBinOp(TargetOpcode::G_UADDSAT);
2085 break;
2086 }
2087 case Intrinsic::aarch64_neon_uqsub: {
2088 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2089 return LowerBinOp(TargetOpcode::G_USUBSAT);
2090 break;
2091 }
2092 case Intrinsic::aarch64_neon_udot:
2093 return LowerTriOp(AArch64::G_UDOT);
2094 case Intrinsic::aarch64_neon_sdot:
2095 return LowerTriOp(AArch64::G_SDOT);
2096 case Intrinsic::aarch64_neon_usdot:
2097 return LowerTriOp(AArch64::G_USDOT);
2098 case Intrinsic::aarch64_neon_sqxtn:
2099 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2100 case Intrinsic::aarch64_neon_sqxtun:
2101 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2102 case Intrinsic::aarch64_neon_uqxtn:
2103 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2104 case Intrinsic::aarch64_neon_fcvtzu:
2105 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2106 case Intrinsic::aarch64_neon_fcvtzs:
2107 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2108 case Intrinsic::aarch64_neon_cls:
2109 return LowerUnaryOp(TargetOpcode::G_CTLS);
2110
2111 case Intrinsic::vector_reverse:
2112 // TODO: Add support for vector_reverse
2113 return false;
2114 }
2115
2116 return true;
2117}
2118
2119bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2121 GISelChangeObserver &Observer) const {
2122 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2123 MI.getOpcode() == TargetOpcode::G_LSHR ||
2124 MI.getOpcode() == TargetOpcode::G_SHL);
2125 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2126 // imported patterns can select it later. Either way, it will be legal.
2127 Register AmtReg = MI.getOperand(2).getReg();
2128 LLT AmtRegEltTy = MRI.getType(AmtReg).getScalarType();
2129 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2130 if (!VRegAndVal)
2131 return true;
2132 // Check the shift amount is in range for an immediate form.
2133 int64_t Amount = VRegAndVal->Value.getSExtValue();
2134 if (Amount > 31)
2135 return true; // This will have to remain a register variant.
2136 auto ExtCst =
2137 MIRBuilder.buildConstant(AmtRegEltTy.changeElementSize(64), Amount);
2138 Observer.changingInstr(MI);
2139 MI.getOperand(2).setReg(ExtCst.getReg(0));
2140 Observer.changedInstr(MI);
2141 return true;
2142}
2143
2145 MachineRegisterInfo &MRI) {
2146 Base = Root;
2147 Offset = 0;
2148
2149 Register NewBase;
2150 int64_t NewOffset;
2151 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2152 isShiftedInt<7, 3>(NewOffset)) {
2153 Base = NewBase;
2154 Offset = NewOffset;
2155 }
2156}
2157
2158// FIXME: This should be removed and replaced with the generic bitcast legalize
2159// action.
2160bool AArch64LegalizerInfo::legalizeLoadStore(
2162 GISelChangeObserver &Observer) const {
2163 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2164 MI.getOpcode() == TargetOpcode::G_LOAD);
2165 // Here we just try to handle vector loads/stores where our value type might
2166 // have pointer elements, which the SelectionDAG importer can't handle. To
2167 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2168 // the value to use s64 types.
2169
2170 // Custom legalization requires the instruction, if not deleted, must be fully
2171 // legalized. In order to allow further legalization of the inst, we create
2172 // a new instruction and erase the existing one.
2173
2174 Register ValReg = MI.getOperand(0).getReg();
2175 const LLT ValTy = MRI.getType(ValReg);
2176
2177 if (ValTy == LLT::scalar(128)) {
2178
2179 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2180 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2181 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2182 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2183 bool IsRcpC3 =
2184 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2185
2186 LLT s64 = LLT::integer(64);
2187
2188 unsigned Opcode;
2189 if (IsRcpC3) {
2190 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2191 } else {
2192 // For LSE2, loads/stores should have been converted to monotonic and had
2193 // a fence inserted after them.
2194 assert(Ordering == AtomicOrdering::Monotonic ||
2195 Ordering == AtomicOrdering::Unordered);
2196 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2197
2198 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2199 }
2200
2201 MachineInstrBuilder NewI;
2202 if (IsLoad) {
2203 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2204 MIRBuilder.buildMergeLikeInstr(
2205 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2206 } else {
2207 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2208 NewI = MIRBuilder.buildInstr(
2209 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2210 }
2211
2212 if (IsRcpC3) {
2213 NewI.addUse(MI.getOperand(1).getReg());
2214 } else {
2215 Register Base;
2216 int Offset;
2217 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2218 NewI.addUse(Base);
2219 NewI.addImm(Offset / 8);
2220 }
2221
2222 NewI.cloneMemRefs(MI);
2223 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2224 *MRI.getTargetRegisterInfo(),
2225 *ST->getRegBankInfo());
2226 MI.eraseFromParent();
2227 return true;
2228 }
2229
2230 if (!ValTy.isPointerVector() ||
2231 ValTy.getElementType().getAddressSpace() != 0) {
2232 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2233 return false;
2234 }
2235
2236 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2237 const LLT NewTy = LLT::vector(ValTy.getElementCount(), LLT::integer(PtrSize));
2238 auto &MMO = **MI.memoperands_begin();
2239 MMO.setType(NewTy);
2240
2241 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2242 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2243 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2244 } else {
2245 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2246 MIRBuilder.buildBitcast(ValReg, NewLoad);
2247 }
2248 MI.eraseFromParent();
2249 return true;
2250}
2251
2252bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2254 MachineIRBuilder &MIRBuilder) const {
2255 MachineFunction &MF = MIRBuilder.getMF();
2256 Align Alignment(MI.getOperand(2).getImm());
2257 Register Dst = MI.getOperand(0).getReg();
2258 Register ListPtr = MI.getOperand(1).getReg();
2259
2260 LLT PtrTy = MRI.getType(ListPtr);
2261 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2262
2263 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2264 const Align PtrAlign = Align(PtrSize);
2265 auto List = MIRBuilder.buildLoad(
2266 PtrTy, ListPtr,
2267 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2268 PtrTy, PtrAlign));
2269
2270 MachineInstrBuilder DstPtr;
2271 if (Alignment > PtrAlign) {
2272 // Realign the list to the actual required alignment.
2273 auto AlignMinus1 =
2274 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2275 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2276 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2277 } else
2278 DstPtr = List;
2279
2280 LLT ValTy = MRI.getType(Dst);
2281 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2282 MIRBuilder.buildLoad(
2283 Dst, DstPtr,
2284 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2285 ValTy, std::max(Alignment, PtrAlign)));
2286
2287 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2288
2289 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2290
2291 MIRBuilder.buildStore(NewList, ListPtr,
2292 *MF.getMachineMemOperand(MachinePointerInfo(),
2294 PtrTy, PtrAlign));
2295
2296 MI.eraseFromParent();
2297 return true;
2298}
2299
2300bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2301 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2302 // Only legal if we can select immediate forms.
2303 // TODO: Lower this otherwise.
2304 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2305 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2306}
2307
2308bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2310 LegalizerHelper &Helper) const {
2311 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2312 // it can be more efficiently lowered to the following sequence that uses
2313 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2314 // registers are cheap.
2315 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2316 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2317 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2318 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2319 //
2320 // For 128 bit vector popcounts, we lower to the following sequence:
2321 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2322 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2323 // uaddlp.4s v0, v0 // v4s32, v2s64
2324 // uaddlp.2d v0, v0 // v2s64
2325 //
2326 // For 64 bit vector popcounts, we lower to the following sequence:
2327 // cnt.8b v0, v0 // v4s16, v2s32
2328 // uaddlp.4h v0, v0 // v4s16, v2s32
2329 // uaddlp.2s v0, v0 // v2s32
2330
2331 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2332 Register Dst = MI.getOperand(0).getReg();
2333 Register Val = MI.getOperand(1).getReg();
2334 LLT Ty = MRI.getType(Val);
2335
2336 LLT i64 = LLT::integer(64);
2337 LLT i32 = LLT::integer(32);
2338 LLT i16 = LLT::integer(16);
2339 LLT i8 = LLT::integer(8);
2340 unsigned Size = Ty.getSizeInBits();
2341
2342 assert(Ty == MRI.getType(Dst) &&
2343 "Expected src and dst to have the same type!");
2344
2345 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2346
2347 auto Split = MIRBuilder.buildUnmerge(i64, Val);
2348 auto CTPOP1 = MIRBuilder.buildCTPOP(i64, Split->getOperand(0));
2349 auto CTPOP2 = MIRBuilder.buildCTPOP(i64, Split->getOperand(1));
2350 auto Add = MIRBuilder.buildAdd(i64, CTPOP1, CTPOP2);
2351
2352 MIRBuilder.buildZExt(Dst, Add);
2353 MI.eraseFromParent();
2354 return true;
2355 }
2356
2357 if (!ST->hasNEON() ||
2358 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2359 // Use generic lowering when custom lowering is not possible.
2360 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2361 Helper.lowerBitCount(MI) ==
2363 }
2364
2365 // Pre-conditioning: widen Val up to the nearest vector type.
2366 // s32,s64,v4s16,v2s32 -> v8i8
2367 // v8s16,v4s32,v2s64 -> v16i8
2368 LLT VTy = Size == 128 ? LLT::fixed_vector(16, i8) : LLT::fixed_vector(8, i8);
2369 if (Ty.isScalar()) {
2370 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2371 if (Size == 32) {
2372 Val = MIRBuilder.buildZExt(i64, Val).getReg(0);
2373 }
2374 }
2375 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2376
2377 // Count bits in each byte-sized lane.
2378 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2379
2380 // Sum across lanes.
2381 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2382 Ty.getScalarSizeInBits() != 16) {
2383 LLT Dt = Ty == LLT::fixed_vector(2, i64) ? LLT::fixed_vector(4, i32) : Ty;
2384 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2385 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2386 MachineInstrBuilder Sum;
2387
2388 if (Ty == LLT::fixed_vector(2, i64)) {
2389 auto UDOT =
2390 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2391 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2392 } else if (Ty == LLT::fixed_vector(4, i32)) {
2393 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2394 } else if (Ty == LLT::fixed_vector(2, i32)) {
2395 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2396 } else {
2397 llvm_unreachable("unexpected vector shape");
2398 }
2399
2400 Sum->getOperand(0).setReg(Dst);
2401 MI.eraseFromParent();
2402 return true;
2403 }
2404
2405 Register HSum = CTPOP.getReg(0);
2406 unsigned Opc;
2407 SmallVector<LLT> HAddTys;
2408 if (Ty.isScalar()) {
2409 Opc = Intrinsic::aarch64_neon_uaddlv;
2410 HAddTys.push_back(i32);
2411 } else if (Ty == LLT::fixed_vector(8, i16)) {
2412 Opc = Intrinsic::aarch64_neon_uaddlp;
2413 HAddTys.push_back(LLT::fixed_vector(8, i16));
2414 } else if (Ty == LLT::fixed_vector(4, i32)) {
2415 Opc = Intrinsic::aarch64_neon_uaddlp;
2416 HAddTys.push_back(LLT::fixed_vector(8, i16));
2417 HAddTys.push_back(LLT::fixed_vector(4, i32));
2418 } else if (Ty == LLT::fixed_vector(2, i64)) {
2419 Opc = Intrinsic::aarch64_neon_uaddlp;
2420 HAddTys.push_back(LLT::fixed_vector(8, i16));
2421 HAddTys.push_back(LLT::fixed_vector(4, i32));
2422 HAddTys.push_back(LLT::fixed_vector(2, i64));
2423 } else if (Ty == LLT::fixed_vector(4, i16)) {
2424 Opc = Intrinsic::aarch64_neon_uaddlp;
2425 HAddTys.push_back(LLT::fixed_vector(4, i16));
2426 } else if (Ty == LLT::fixed_vector(2, i32)) {
2427 Opc = Intrinsic::aarch64_neon_uaddlp;
2428 HAddTys.push_back(LLT::fixed_vector(4, i16));
2429 HAddTys.push_back(LLT::fixed_vector(2, i32));
2430 } else
2431 llvm_unreachable("unexpected vector shape");
2433 for (LLT HTy : HAddTys) {
2434 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2435 HSum = UADD.getReg(0);
2436 }
2437
2438 // Post-conditioning.
2439 if (Ty.isScalar() && (Size == 64 || Size == 128))
2440 MIRBuilder.buildZExt(Dst, UADD);
2441 else
2442 UADD->getOperand(0).setReg(Dst);
2443 MI.eraseFromParent();
2444 return true;
2445}
2446
2447bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2448 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2449 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2450 LLT i64 = LLT::integer(64);
2451 auto Addr = MI.getOperand(1).getReg();
2452 auto DesiredI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(2));
2453 auto NewI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(3));
2454 auto DstLo = MRI.createGenericVirtualRegister(i64);
2455 auto DstHi = MRI.createGenericVirtualRegister(i64);
2456
2457 MachineInstrBuilder CAS;
2458 if (ST->hasLSE()) {
2459 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2460 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2461 // the rest of the MIR so we must reassemble the extracted registers into a
2462 // 128-bit known-regclass one with code like this:
2463 //
2464 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2465 // %out = CASP %in1, ...
2466 // %OldLo = G_EXTRACT %out, 0
2467 // %OldHi = G_EXTRACT %out, 64
2468 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2469 unsigned Opcode;
2470 switch (Ordering) {
2472 Opcode = AArch64::CASPAX;
2473 break;
2475 Opcode = AArch64::CASPLX;
2476 break;
2479 Opcode = AArch64::CASPALX;
2480 break;
2481 default:
2482 Opcode = AArch64::CASPX;
2483 break;
2484 }
2485
2486 LLT s128 = LLT::scalar(128);
2487 auto CASDst = MRI.createGenericVirtualRegister(s128);
2488 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2489 auto CASNew = MRI.createGenericVirtualRegister(s128);
2490 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2491 .addUse(DesiredI->getOperand(0).getReg())
2492 .addImm(AArch64::sube64)
2493 .addUse(DesiredI->getOperand(1).getReg())
2494 .addImm(AArch64::subo64);
2495 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2496 .addUse(NewI->getOperand(0).getReg())
2497 .addImm(AArch64::sube64)
2498 .addUse(NewI->getOperand(1).getReg())
2499 .addImm(AArch64::subo64);
2500
2501 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2502
2503 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2504 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2505 } else {
2506 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2507 // can take arbitrary registers so it just has the normal GPR64 operands the
2508 // rest of AArch64 is expecting.
2509 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2510 unsigned Opcode;
2511 switch (Ordering) {
2513 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2514 break;
2516 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2517 break;
2520 Opcode = AArch64::CMP_SWAP_128;
2521 break;
2522 default:
2523 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2524 break;
2525 }
2526
2527 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2528 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2529 {Addr, DesiredI->getOperand(0),
2530 DesiredI->getOperand(1), NewI->getOperand(0),
2531 NewI->getOperand(1)});
2532 }
2533
2534 CAS.cloneMemRefs(MI);
2535 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2536 *MRI.getTargetRegisterInfo(),
2537 *ST->getRegBankInfo());
2538
2539 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2540 MI.eraseFromParent();
2541 return true;
2542}
2543
2544bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2545 LegalizerHelper &Helper) const {
2546 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2547 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2548 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2549 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2550 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2551 MI.eraseFromParent();
2552 return true;
2553}
2554
2555bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2556 LegalizerHelper &Helper) const {
2557 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2558
2559 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2560 if (MI.getOpcode() == TargetOpcode::G_MEMSET ||
2561 MI.getOpcode() == TargetOpcode::G_MEMSET_INLINE) {
2562 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2563 // the instruction).
2564 auto &Value = MI.getOperand(1);
2565 Register ExtValueReg =
2566 MIRBuilder.buildAnyExt(LLT::integer(64), Value).getReg(0);
2567 Value.setReg(ExtValueReg);
2568 return true;
2569 }
2570
2571 return false;
2572}
2573
2574bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2575 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2576 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2577 auto VRegAndVal =
2579 if (VRegAndVal)
2580 return true;
2581 LLT VecTy = MRI.getType(Element->getVectorReg());
2582 if (VecTy.isScalableVector())
2583 return true;
2584 return Helper.lowerExtractInsertVectorElt(MI) !=
2586}
2587
2588bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2589 MachineInstr &MI, LegalizerHelper &Helper) const {
2590 MachineFunction &MF = *MI.getParent()->getParent();
2591 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2592 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2593
2594 // If stack probing is not enabled for this function, use the default
2595 // lowering.
2596 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2597 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2598 "inline-asm") {
2599 Helper.lowerDynStackAlloc(MI);
2600 return true;
2601 }
2602
2603 Register Dst = MI.getOperand(0).getReg();
2604 Register AllocSize = MI.getOperand(1).getReg();
2605 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2606
2607 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2608 "Unexpected type for dynamic alloca");
2609 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2610 "Unexpected type for dynamic alloca");
2611
2612 LLT PtrTy = MRI.getType(Dst);
2613 Register SPReg =
2615 Register SPTmp =
2616 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2617 auto NewMI =
2618 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2619 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2620 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2621 MIRBuilder.buildCopy(Dst, SPTmp);
2622
2623 MI.eraseFromParent();
2624 return true;
2625}
2626
2627bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2628 LegalizerHelper &Helper) const {
2629 MachineIRBuilder &MIB = Helper.MIRBuilder;
2630 auto &AddrVal = MI.getOperand(0);
2631
2632 int64_t IsWrite = MI.getOperand(1).getImm();
2633 int64_t Locality = MI.getOperand(2).getImm();
2634 int64_t IsData = MI.getOperand(3).getImm();
2635
2636 bool IsStream = Locality == 0;
2637 if (Locality != 0) {
2638 assert(Locality <= 3 && "Prefetch locality out-of-range");
2639 // The locality degree is the opposite of the cache speed.
2640 // Put the number the other way around.
2641 // The encoding starts at 0 for level 1
2642 Locality = 3 - Locality;
2643 }
2644
2645 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2646
2647 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2648 MI.eraseFromParent();
2649 return true;
2650}
2651
2652bool AArch64LegalizerInfo::legalizeConcatVectors(
2654 MachineIRBuilder &MIRBuilder) const {
2655 // Widen sub-byte element vectors to byte-sized elements before concatenating.
2656 // This is analogous to SDAG's integer type promotion for sub-byte types.
2658 Register DstReg = Concat.getReg(0);
2659 LLT DstTy = MRI.getType(DstReg);
2660 assert(DstTy.getScalarSizeInBits() < 8 && "Expected dst ty to be < 8b");
2661
2662 unsigned WideEltSize =
2663 std::max(8u, (unsigned)PowerOf2Ceil(DstTy.getScalarSizeInBits()));
2664 LLT SrcTy = MRI.getType(Concat.getSourceReg(0));
2665 LLT WideSrcTy = SrcTy.changeElementSize(WideEltSize);
2666 LLT WideDstTy = DstTy.changeElementSize(WideEltSize);
2667
2668 SmallVector<Register> WideSrcs;
2669 for (unsigned I = 0; I < Concat.getNumSources(); ++I) {
2670 auto Wide = MIRBuilder.buildAnyExt(WideSrcTy, Concat.getSourceReg(I));
2671 WideSrcs.push_back(Wide.getReg(0));
2672 }
2673
2674 auto WideConcat = MIRBuilder.buildConcatVectors(WideDstTy, WideSrcs);
2675 MIRBuilder.buildTrunc(DstReg, WideConcat);
2676 MI.eraseFromParent();
2677 return true;
2678}
2679
2680bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2681 MachineIRBuilder &MIRBuilder,
2682 MachineRegisterInfo &MRI) const {
2683 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2684
2685 // This function legalizes f64 -> bf16 and f64 -> f16 truncations via f64 ->
2686 // f32 G_FPTRUNC_ODD and f32 -> [b]f16 G_FPTRUNC, which apparently avoids the
2687 // usual double-rounding issue that could be present from using twin
2688 // G_FPTRUNC.
2689
2690 if (DstTy.isBFloat16() && SrcTy.isFloat64()) {
2691 auto Mid =
2692 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {LLT::float32()}, {Src});
2693 MIRBuilder.buildInstr(AArch64::G_FPTRUNC, {Dst}, {Mid});
2694 MI.eraseFromParent();
2695 return true;
2696 }
2697
2698 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2699 "Expected a power of 2 elements");
2700
2701 // We must mutate types here as FPTrunc may be used on a IEEE floating point
2702 // or a brainfloat.
2703 LLT v2s16 = DstTy.changeElementCount(2);
2704 LLT v4s16 = DstTy.changeElementCount(4);
2705 LLT v2s32 = SrcTy.changeElementCount(2).changeElementSize(32);
2706 LLT v4s32 = SrcTy.changeElementCount(4).changeElementSize(32);
2707 LLT v2s64 = SrcTy.changeElementCount(2);
2708
2709 SmallVector<Register> RegsToUnmergeTo;
2710 SmallVector<Register> TruncOddDstRegs;
2711 SmallVector<Register> RegsToMerge;
2712
2713 unsigned ElemCount = SrcTy.getNumElements();
2714
2715 // Find the biggest size chunks we can work with
2716 int StepSize = ElemCount % 4 ? 2 : 4;
2717
2718 // If we have a power of 2 greater than 2, we need to first unmerge into
2719 // enough pieces
2720 if (ElemCount <= 2)
2721 RegsToUnmergeTo.push_back(Src);
2722 else {
2723 for (unsigned i = 0; i < ElemCount / 2; ++i)
2724 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2725
2726 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2727 }
2728
2729 // Create all of the round-to-odd instructions and store them
2730 for (auto SrcReg : RegsToUnmergeTo) {
2731 Register Mid =
2732 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2733 .getReg(0);
2734 TruncOddDstRegs.push_back(Mid);
2735 }
2736
2737 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2738 // truncate 2s32 to 2s16.
2739 unsigned Index = 0;
2740 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2741 if (StepSize == 4) {
2742 Register ConcatDst =
2743 MIRBuilder
2745 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2746 .getReg(0);
2747
2748 RegsToMerge.push_back(
2749 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2750 } else {
2751 RegsToMerge.push_back(
2752 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2753 }
2754 }
2755
2756 // If there is only one register, replace the destination
2757 if (RegsToMerge.size() == 1) {
2758 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2759 MI.eraseFromParent();
2760 return true;
2761 }
2762
2763 // Merge the rest of the instructions & replace the register
2764 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2765 MRI.replaceRegWith(Dst, Fin);
2766 MI.eraseFromParent();
2767 return true;
2768}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:77
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_NE
not equal
Definition InstrTypes.h:762
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:758
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:723
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
static constexpr LLT float128()
Get a 128-bit IEEE quad value.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
LLT divide(int Factor) const
Return a type that is Factor times smaller.
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
bool isFloat64() const
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:159
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1530
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
IntPtrTy
Definition InstrProf.h:82
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:436
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...