LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 const LLT bf16 = LLT::bfloat16();
69 const LLT v4bf16 = LLT::fixed_vector(4, bf16);
70
71 const LLT f16 = LLT::float16();
72 const LLT v4f16 = LLT::fixed_vector(4, f16);
73 const LLT v8f16 = LLT::fixed_vector(8, f16);
74
75 const LLT f32 = LLT::float32();
76 const LLT v2f32 = LLT::fixed_vector(2, f32);
77 const LLT v4f32 = LLT::fixed_vector(4, f32);
78
79 const LLT f64 = LLT::float64();
80 const LLT v2f64 = LLT::fixed_vector(2, f64);
81
82 const LLT f128 = LLT::float128();
83
84 const LLT i8 = LLT::integer(8);
85 const LLT v8i8 = LLT::fixed_vector(8, i8);
86 const LLT v16i8 = LLT::fixed_vector(16, i8);
87
88 const LLT i16 = LLT::integer(16);
89 const LLT v8i16 = LLT::fixed_vector(8, i16);
90 const LLT v4i16 = LLT::fixed_vector(4, i16);
91
92 const LLT i32 = LLT::integer(32);
93 const LLT v2i32 = LLT::fixed_vector(2, i32);
94 const LLT v4i32 = LLT::fixed_vector(4, i32);
95
96 const LLT i64 = LLT::integer(64);
97 const LLT v2i64 = LLT::fixed_vector(2, i64);
98
99 const LLT i128 = LLT::integer(128);
100
101 const LLT nxv16i8 = LLT::scalable_vector(16, i8);
102 const LLT nxv8i16 = LLT::scalable_vector(8, i16);
103 const LLT nxv4i32 = LLT::scalable_vector(4, i32);
104 const LLT nxv2i64 = LLT::scalable_vector(2, i64);
105
106 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
107 v16s8, v8s16, v4s32,
108 v2s64, v2p0,
109 /* End 128bit types */
110 /* Begin 64bit types */
111 v8s8, v4s16, v2s32};
112 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
113 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
114 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
115
116 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
117
118 // FIXME: support subtargets which have neon/fp-armv8 disabled.
119 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
121 return;
122 }
123
124 // Some instructions only support s16 if the subtarget has full 16-bit FP
125 // support.
126 const bool HasFP16 = ST.hasFullFP16();
127 const LLT &MinFPScalar = HasFP16 ? f16 : f32;
128
129 const bool HasCSSC = ST.hasCSSC();
130 const bool HasRCPC3 = ST.hasRCPC3();
131 const bool HasSVE = ST.hasSVE();
132
134 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
135 .legalFor({p0, s8, s16, s32, s64})
136 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
137 v2s64, v2p0})
138 .widenScalarToNextPow2(0)
139 .clampScalar(0, s8, s64)
142 .clampNumElements(0, v8s8, v16s8)
143 .clampNumElements(0, v4s16, v8s16)
144 .clampNumElements(0, v2s32, v4s32)
145 .clampMaxNumElements(0, s64, 2)
146 .clampMaxNumElements(0, p0, 2)
148
150 .legalFor({p0, s16, s32, s64})
151 .legalFor(PackedVectorAllTypeList)
155 .clampScalar(0, s16, s64)
156 .clampNumElements(0, v8s8, v16s8)
157 .clampNumElements(0, v4s16, v8s16)
158 .clampNumElements(0, v2s32, v4s32)
159 .clampMaxNumElements(0, s64, 2)
160 .clampMaxNumElements(0, p0, 2);
161
163 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
164 smallerThan(1, 0)))
165 .widenScalarToNextPow2(0)
166 .clampScalar(0, s32, s64)
168 .minScalar(1, s8)
169 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
170 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
171
173 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
174 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
175 .widenScalarToNextPow2(1)
176 .clampScalar(1, s32, s128)
178 .minScalar(0, s16)
179 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
180 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
181 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
182
183 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
184 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
185 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
186 .widenScalarToNextPow2(0)
187 .clampScalar(0, s32, s64)
188 .clampMaxNumElements(0, s8, 16)
189 .clampMaxNumElements(0, s16, 8)
190 .clampNumElements(0, v2s32, v4s32)
191 .clampNumElements(0, v2s64, v2s64)
193 [=](const LegalityQuery &Query) {
194 return Query.Types[0].getNumElements() <= 2;
195 },
196 0, s32)
197 .minScalarOrEltIf(
198 [=](const LegalityQuery &Query) {
199 return Query.Types[0].getNumElements() <= 4;
200 },
201 0, s16)
202 .minScalarOrEltIf(
203 [=](const LegalityQuery &Query) {
204 return Query.Types[0].getNumElements() <= 16;
205 },
206 0, s8)
207 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
209
211 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
212 .widenScalarToNextPow2(0)
213 .clampScalar(0, s32, s64)
214 .clampMaxNumElements(0, s8, 16)
215 .clampMaxNumElements(0, s16, 8)
216 .clampNumElements(0, v2s32, v4s32)
217 .clampNumElements(0, v2s64, v2s64)
219 [=](const LegalityQuery &Query) {
220 return Query.Types[0].getNumElements() <= 2;
221 },
222 0, s32)
223 .minScalarOrEltIf(
224 [=](const LegalityQuery &Query) {
225 return Query.Types[0].getNumElements() <= 4;
226 },
227 0, s16)
228 .minScalarOrEltIf(
229 [=](const LegalityQuery &Query) {
230 return Query.Types[0].getNumElements() <= 16;
231 },
232 0, s8)
233 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
235
236 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
237 .customIf([=](const LegalityQuery &Query) {
238 const auto &SrcTy = Query.Types[0];
239 const auto &AmtTy = Query.Types[1];
240 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
241 AmtTy.getSizeInBits() == 32;
242 })
243 .legalFor({
244 {i32, i32},
245 {i32, i64},
246 {i64, i64},
247 {v8i8, v8i8},
248 {v16i8, v16i8},
249 {v4i16, v4i16},
250 {v8i16, v8i16},
251 {v2i32, v2i32},
252 {v4i32, v4i32},
253 {v2i64, v2i64},
254 })
255 .widenScalarToNextPow2(0)
256 .clampScalar(1, s32, s64)
257 .clampScalar(0, s32, s64)
258 .clampNumElements(0, v8s8, v16s8)
259 .clampNumElements(0, v4s16, v8s16)
260 .clampNumElements(0, v2s32, v4s32)
261 .clampNumElements(0, v2s64, v2s64)
263 .minScalarSameAs(1, 0)
267
269 .legalFor({{p0, i64}, {v2p0, v2i64}})
270 .clampScalarOrElt(1, s64, s64)
271 .clampNumElements(0, v2p0, v2p0);
272
273 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
274
275 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
276 .legalFor({i32, i64})
277 .libcallFor({i128})
278 .clampScalar(0, s32, s64)
280 .scalarize(0);
281
282 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
283 .lowerFor({i8, i16, i32, i64, v2i32, v4i32, v2i64})
284 .libcallFor({i128})
286 .minScalarOrElt(0, s32)
287 .clampNumElements(0, v2s32, v4s32)
288 .clampNumElements(0, v2s64, v2s64)
289 .scalarize(0);
290
291 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
292 .widenScalarToNextPow2(0, /*Min = */ 32)
293 .clampScalar(0, s32, s64)
294 .lower();
295
296 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
297 .legalFor({i64, v16i8, v8i16, v4i32})
298 .lower();
299
300 getActionDefinitionsBuilder({G_SMULFIX, G_UMULFIX}).lower();
301
302 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
303 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
304 .legalFor(HasCSSC, {i32, i64})
305 .minScalar(HasCSSC, 0, s32)
306 .clampNumElements(0, v8s8, v16s8)
307 .clampNumElements(0, v4s16, v8s16)
308 .clampNumElements(0, v2s32, v4s32)
309 .lower();
310
311 // FIXME: Legal vector types are only legal with NEON.
313 .legalFor(HasCSSC, {i32, i64})
314 .legalFor({v16i8, v8i16, v4i32, v2i64, v2p0, v8i8, v4i16, v2i32})
315 .customIf([=](const LegalityQuery &Q) {
316 // TODO: Fix suboptimal codegen for 128+ bit types.
317 LLT SrcTy = Q.Types[0];
318 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
319 })
320 .widenScalarIf(
321 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
322 [=](const LegalityQuery &Query) { return std::make_pair(0, v4i16); })
323 .widenScalarIf(
324 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
325 [=](const LegalityQuery &Query) { return std::make_pair(0, v2i32); })
326 .clampNumElements(0, v8s8, v16s8)
327 .clampNumElements(0, v4s16, v8s16)
328 .clampNumElements(0, v2s32, v4s32)
329 .clampNumElements(0, v2s64, v2s64)
331 .lower();
332
334 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
335 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
336 .lower();
337
339 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
340 .legalFor({{i32, i32}, {i64, i32}})
341 .clampScalar(0, s32, s64)
342 .clampScalar(1, s32, s64)
344
345 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
346 .customFor({{i32, i32}, {i32, i64}, {i64, i64}})
347 .lower();
348
350 .legalFor({{i32, i64}, {i64, i64}})
351 .customIf([=](const LegalityQuery &Q) {
352 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
353 })
354 .lower();
356
357 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
358 .customFor({{s32, s32}, {s64, s64}});
359
360 auto always = [=](const LegalityQuery &Q) { return true; };
362 .legalFor(HasCSSC, {{i32, i32}, {i64, i64}})
363 .legalFor({{v8i8, v8i8}, {v16i8, v16i8}})
364 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
365 .customFor({{s128, s128},
366 {v4s16, v4s16},
367 {v8s16, v8s16},
368 {v2s32, v2s32},
369 {v4s32, v4s32},
370 {v2s64, v2s64}})
371 .clampScalar(0, s32, s128)
374 .minScalarEltSameAsIf(always, 1, 0)
375 .maxScalarEltSameAsIf(always, 1, 0)
376 .clampNumElements(0, v8s8, v16s8)
377 .clampNumElements(0, v4s16, v8s16)
378 .clampNumElements(0, v2s32, v4s32)
379 .clampNumElements(0, v2s64, v2s64)
382
383 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
384 .legalFor({{i32, i32},
385 {i64, i64},
386 {v8i8, v8i8},
387 {v16i8, v16i8},
388 {v4i16, v4i16},
389 {v8i16, v8i16},
390 {v2i32, v2i32},
391 {v4i32, v4i32}})
392 .widenScalarToNextPow2(1, /*Min=*/32)
393 .clampScalar(1, s32, s64)
395 .clampNumElements(0, v8s8, v16s8)
396 .clampNumElements(0, v4s16, v8s16)
397 .clampNumElements(0, v2s32, v4s32)
400 .scalarSameSizeAs(0, 1);
401
402 getActionDefinitionsBuilder(G_CTLZ_ZERO_POISON).lower();
403
405 .lowerIf(isVector(0))
406 .widenScalarToNextPow2(1, /*Min=*/32)
407 .clampScalar(1, s32, s64)
408 .scalarSameSizeAs(0, 1)
409 .legalFor(HasCSSC, {s32, s64})
410 .customFor(!HasCSSC, {s32, s64});
411
412 getActionDefinitionsBuilder(G_CTTZ_ZERO_POISON).lower();
413
414 getActionDefinitionsBuilder(G_BITREVERSE)
415 .legalFor({i32, i64, v8i8, v16i8})
416 .widenScalarToNextPow2(0, /*Min = */ 32)
418 .clampScalar(0, s32, s64)
419 .clampNumElements(0, v8s8, v16s8)
420 .clampNumElements(0, v4s16, v8s16)
421 .clampNumElements(0, v2s32, v4s32)
422 .clampNumElements(0, v2s64, v2s64)
425 .lower();
426
428 .legalFor({i32, i64, v4i16, v8i16, v2i32, v4i32, v2i64})
430 .clampScalar(0, s32, s64)
431 .clampNumElements(0, v4s16, v8s16)
432 .clampNumElements(0, v2s32, v4s32)
433 .clampNumElements(0, v2s64, v2s64)
435
436 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
437 .legalFor({v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
438 .legalFor(HasSVE, {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
439 .clampNumElements(0, v8s8, v16s8)
440 .clampNumElements(0, v4s16, v8s16)
441 .clampNumElements(0, v2s32, v4s32)
442 .clampMaxNumElements(0, s64, 2)
445 .lower();
446
448 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
449 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
450 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
451 .legalFor({f32, f64, v2f32, v4f32, v2f64})
452 .legalFor(HasFP16, {f16, v4f16, v8f16})
453 .libcallFor({f128})
454 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
456 [=](const LegalityQuery &Q) {
457 return (!HasFP16 && Q.Types[0].getScalarType().isFloat16()) ||
458 Q.Types[0].getScalarType().isBFloat16();
459 },
460 changeElementTo(0, f32))
461 .clampNumElements(0, v4s16, v8s16)
462 .clampNumElements(0, v2s32, v4s32)
463 .clampNumElements(0, v2s64, v2s64)
465
466 getActionDefinitionsBuilder({G_FABS, G_FNEG})
467 .legalFor({f32, f64, v2f32, v4f32, v2f64})
468 .legalFor(HasFP16, {f16, v4f16, v8f16})
469 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
471 .clampNumElements(0, v4s16, v8s16)
472 .clampNumElements(0, v2s32, v4s32)
473 .clampNumElements(0, v2s64, v2s64)
475 .lowerFor({f16, v4f16, v8f16});
476
478 .libcallFor({f32, f64, f128})
479 .minScalar(0, f32)
480 .scalarize(0);
481
482 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
483 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
484 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
485 G_FSINH, G_FTANH, G_FMODF})
486 // We need a call for these, so we always need to scalarize.
487 .scalarize(0)
488 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
489 .minScalar(0, f32)
490 .libcallFor({f32, f64, f128});
491 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
492 .scalarize(0)
493 .minScalar(0, f32)
494 .libcallFor({{f32, i32}, {f64, i32}, {f128, i32}});
495
496 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
497 .legalFor({{i32, f32}, {i32, f64}, {i64, f32}, {i64, f64}})
498 .legalFor(HasFP16, {{i32, f16}, {i64, f16}})
499 .minScalar(1, s32)
500 .libcallFor({{s64, s128}})
501 .lower();
502 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
503 .legalFor({{i64, f32}, {i64, f64}})
504 .legalFor(HasFP16, {{i64, f16}})
505 .minScalar(0, s64)
506 .minScalar(1, s32)
507 .libcallFor({{s64, s128}})
508 .lower();
509
510 // TODO: Custom legalization for mismatched types.
511 getActionDefinitionsBuilder(G_FCOPYSIGN)
513 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
514 [=](const LegalityQuery &Query) {
515 const LLT Ty = Query.Types[0];
516 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
517 })
518 .lower();
519
521
522 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
523 auto &Actions = getActionDefinitionsBuilder(Op);
524
525 if (Op == G_SEXTLOAD)
527
528 // Atomics have zero extending behavior.
529 Actions
530 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
531 {s32, p0, s16, 8},
532 {s32, p0, s32, 8},
533 {s64, p0, s8, 2},
534 {s64, p0, s16, 2},
535 {s64, p0, s32, 4},
536 {s64, p0, s64, 8},
537 {p0, p0, s64, 8},
538 {v2s32, p0, s64, 8}})
539 .widenScalarToNextPow2(0)
540 .clampScalar(0, s32, s64)
541 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
542 // how to do that yet.
543 .unsupportedIfMemSizeNotPow2()
544 // Lower anything left over into G_*EXT and G_LOAD
545 .lower();
546 }
547
548 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
549 const LLT &ValTy = Query.Types[0];
550 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
551 };
552
554 .customIf([=](const LegalityQuery &Query) {
555 return HasRCPC3 && Query.Types[0] == s128 &&
556 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
557 })
558 .customIf([=](const LegalityQuery &Query) {
559 return Query.Types[0] == s128 &&
560 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
561 })
562 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
563 {s16, p0, s16, 8},
564 {s32, p0, s32, 8},
565 {s64, p0, s64, 8},
566 {p0, p0, s64, 8},
567 {s128, p0, s128, 8},
568 {v8s8, p0, s64, 8},
569 {v16s8, p0, s128, 8},
570 {v4s16, p0, s64, 8},
571 {v8s16, p0, s128, 8},
572 {v2s32, p0, s64, 8},
573 {v4s32, p0, s128, 8},
574 {v2s64, p0, s128, 8}})
575 // These extends are also legal
576 .legalForTypesWithMemDesc(
577 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
578 .legalForTypesWithMemDesc({
579 // SVE vscale x 128 bit base sizes
580 {nxv16s8, p0, nxv16s8, 8},
581 {nxv8s16, p0, nxv8s16, 8},
582 {nxv4s32, p0, nxv4s32, 8},
583 {nxv2s64, p0, nxv2s64, 8},
584 })
585 .widenScalarToNextPow2(0, /* MinSize = */ 8)
586 .clampMaxNumElements(0, s8, 16)
587 .clampMaxNumElements(0, s16, 8)
588 .clampMaxNumElements(0, s32, 4)
589 .clampMaxNumElements(0, s64, 2)
590 .clampMaxNumElements(0, p0, 2)
592 .clampScalar(0, s8, s64)
594 [=](const LegalityQuery &Query) {
595 // Clamp extending load results to 32-bits.
596 return Query.Types[0].isScalar() &&
597 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
598 Query.Types[0].getSizeInBits() > 32;
599 },
600 changeTo(0, s32))
601 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
602 .bitcastIf(typeInSet(0, {v4s8}),
603 [=](const LegalityQuery &Query) {
604 const LLT VecTy = Query.Types[0];
605 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
606 })
607 .customIf(IsPtrVecPred)
608 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
609 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
610
612 .customIf([=](const LegalityQuery &Query) {
613 return HasRCPC3 && Query.Types[0] == s128 &&
614 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
615 })
616 .customIf([=](const LegalityQuery &Query) {
617 return Query.Types[0] == s128 &&
618 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
619 })
620 .widenScalarIf(
621 all(scalarNarrowerThan(0, 32),
623 changeTo(0, s32))
625 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
626 {s32, p0, s8, 8}, // truncstorei8 from s32
627 {s64, p0, s8, 8}, // truncstorei8 from s64
628 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
629 {s64, p0, s16, 8}, // truncstorei16 from s64
630 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
631 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
632 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
633 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
634 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
635 .legalForTypesWithMemDesc({
636 // SVE vscale x 128 bit base sizes
637 // TODO: Add nxv2p0. Consider bitcastIf.
638 // See #92130
639 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
640 {nxv16s8, p0, nxv16s8, 8},
641 {nxv8s16, p0, nxv8s16, 8},
642 {nxv4s32, p0, nxv4s32, 8},
643 {nxv2s64, p0, nxv2s64, 8},
644 })
645 .clampScalar(0, s8, s64)
646 .minScalarOrElt(0, s8)
647 .lowerIf([=](const LegalityQuery &Query) {
648 return Query.Types[0].isScalar() &&
649 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
650 })
651 // Maximum: sN * k = 128
652 .clampMaxNumElements(0, s8, 16)
653 .clampMaxNumElements(0, s16, 8)
654 .clampMaxNumElements(0, s32, 4)
655 .clampMaxNumElements(0, s64, 2)
656 .clampMaxNumElements(0, p0, 2)
658 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
659 .bitcastIf(all(typeInSet(0, {v4s8}),
660 LegalityPredicate([=](const LegalityQuery &Query) {
661 return Query.Types[0].getSizeInBits() ==
662 Query.MMODescrs[0].MemoryTy.getSizeInBits();
663 })),
664 [=](const LegalityQuery &Query) {
665 const LLT VecTy = Query.Types[0];
666 return std::pair(0, LLT::integer(VecTy.getSizeInBits()));
667 })
668 .customIf(IsPtrVecPred)
669 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
670 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
671 .lower();
672
673 getActionDefinitionsBuilder(G_INDEXED_STORE)
674 // Idx 0 == Ptr, Idx 1 == Val
675 // TODO: we can implement legalizations but as of now these are
676 // generated in a very specific way.
678 {p0, s8, s8, 8},
679 {p0, s16, s16, 8},
680 {p0, s32, s8, 8},
681 {p0, s32, s16, 8},
682 {p0, s32, s32, 8},
683 {p0, s64, s64, 8},
684 {p0, p0, p0, 8},
685 {p0, v8s8, v8s8, 8},
686 {p0, v16s8, v16s8, 8},
687 {p0, v4s16, v4s16, 8},
688 {p0, v8s16, v8s16, 8},
689 {p0, v2s32, v2s32, 8},
690 {p0, v4s32, v4s32, 8},
691 {p0, v2s64, v2s64, 8},
692 {p0, v2p0, v2p0, 8},
693 {p0, s128, s128, 8},
694 })
695 .unsupported();
696
697 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
698 LLT LdTy = Query.Types[0];
699 LLT PtrTy = Query.Types[1];
700 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
701 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
702 return false;
703 if (PtrTy != p0)
704 return false;
705 return true;
706 };
707 getActionDefinitionsBuilder(G_INDEXED_LOAD)
710 .legalIf(IndexedLoadBasicPred)
711 .unsupported();
712 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
713 .unsupportedIf(
715 .legalIf(all(typeInSet(0, {s16, s32, s64}),
716 LegalityPredicate([=](const LegalityQuery &Q) {
717 LLT LdTy = Q.Types[0];
718 LLT PtrTy = Q.Types[1];
719 LLT MemTy = Q.MMODescrs[0].MemoryTy;
720 if (PtrTy != p0)
721 return false;
722 if (LdTy == s16)
723 return MemTy == s8;
724 if (LdTy == s32)
725 return MemTy == s8 || MemTy == s16;
726 if (LdTy == s64)
727 return MemTy == s8 || MemTy == s16 || MemTy == s32;
728 return false;
729 })))
730 .unsupported();
731
732 // Constants
734 .legalFor({p0, s8, s16, s32, s64})
735 .widenScalarToNextPow2(0)
736 .clampScalar(0, s8, s64);
737 getActionDefinitionsBuilder(G_FCONSTANT)
738 .legalFor({s16, s32, s64, s128})
739 .clampScalar(0, MinFPScalar, s128);
740
741 // FIXME: fix moreElementsToNextPow2
743 .legalFor({{i32, i32}, {i32, i64}, {i32, p0}})
745 .minScalarOrElt(1, s8)
746 .clampScalar(1, s32, s64)
747 .clampScalar(0, s32, s32)
750 [=](const LegalityQuery &Query) {
751 const LLT &Ty = Query.Types[0];
752 const LLT &SrcTy = Query.Types[1];
753 return Ty.isVector() && !SrcTy.isPointerVector() &&
754 Ty.getElementType() != SrcTy.getElementType();
755 },
756 0, 1)
757 .minScalarOrEltIf(
758 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
759 1, s32)
760 .minScalarOrEltIf(
761 [=](const LegalityQuery &Query) {
762 return Query.Types[1].isPointerVector();
763 },
764 0, s64)
766 .clampNumElements(1, v8s8, v16s8)
767 .clampNumElements(1, v4s16, v8s16)
768 .clampNumElements(1, v2s32, v4s32)
769 .clampNumElements(1, v2s64, v2s64)
770 .clampNumElements(1, v2p0, v2p0)
771 .customIf(isVector(0));
772
774 .legalFor({{i32, f32},
775 {i32, f64},
776 {v4i32, v4f32},
777 {v2i32, v2f32},
778 {v2i64, v2f64}})
779 .legalFor(HasFP16, {{i32, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
781 .clampScalar(0, s32, s32)
783 [=](const LegalityQuery &Q) {
784 return (!HasFP16 && Q.Types[1].getScalarType().isFloat16()) ||
785 Q.Types[1].getScalarType().isBFloat16();
786 },
787 changeElementTo(1, f32))
788 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
790 [=](const LegalityQuery &Query) {
791 const LLT &Ty = Query.Types[0];
792 const LLT &SrcTy = Query.Types[1];
793 return Ty.isVector() && !SrcTy.isPointerVector() &&
794 Ty.getElementType() != SrcTy.getElementType();
795 },
796 0, 1)
797 .clampNumElements(1, v4s16, v8s16)
798 .clampNumElements(1, v2s32, v4s32)
799 .clampMaxNumElements(1, s64, 2)
801 .libcallFor({{s32, s128}});
802
803 // Extensions
804 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
805 unsigned DstSize = Query.Types[0].getSizeInBits();
806
807 // Handle legal vectors using legalFor
808 if (Query.Types[0].isVector())
809 return false;
810
811 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
812 return false; // Extending to a scalar s128 needs narrowing.
813
814 const LLT &SrcTy = Query.Types[1];
815
816 // Make sure we fit in a register otherwise. Don't bother checking that
817 // the source type is below 128 bits. We shouldn't be allowing anything
818 // through which is wider than the destination in the first place.
819 unsigned SrcSize = SrcTy.getSizeInBits();
820 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
821 return false;
822
823 return true;
824 };
825 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
826 .legalIf(ExtLegalFunc)
827 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
828 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
830 .clampMaxNumElements(1, s8, 8)
831 .clampMaxNumElements(1, s16, 4)
832 .clampMaxNumElements(1, s32, 2)
833 // Tries to convert a large EXTEND into two smaller EXTENDs
834 .lowerIf([=](const LegalityQuery &Query) {
835 return (Query.Types[0].getScalarSizeInBits() >
836 Query.Types[1].getScalarSizeInBits() * 2) &&
837 Query.Types[0].isVector() &&
838 (Query.Types[1].getScalarSizeInBits() == 8 ||
839 Query.Types[1].getScalarSizeInBits() == 16);
840 })
841 .clampMinNumElements(1, s8, 8)
842 .clampMinNumElements(1, s16, 4)
844
846 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
848 .clampMaxNumElements(0, s8, 8)
849 .clampMaxNumElements(0, s16, 4)
850 .clampMaxNumElements(0, s32, 2)
852 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
853 0, s8)
854 .lowerIf([=](const LegalityQuery &Query) {
855 LLT DstTy = Query.Types[0];
856 LLT SrcTy = Query.Types[1];
857 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
858 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
859 })
860 .clampMinNumElements(0, s8, 8)
861 .clampMinNumElements(0, s16, 4)
862 .alwaysLegal();
863
864 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
865 .legalFor({{v8i8, v8i16}, {v4i16, v4i32}, {v2i32, v2i64}})
866 .clampNumElements(0, v2s32, v2s32);
867
868 getActionDefinitionsBuilder(G_SEXT_INREG)
869 .legalFor({i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
870 .maxScalar(0, s64)
871 .clampNumElements(0, v8s8, v16s8)
872 .clampNumElements(0, v4s16, v8s16)
873 .clampNumElements(0, v2s32, v4s32)
874 .clampMaxNumElements(0, s64, 2)
875 .lower();
876
877 // FP conversions
879 .legalFor(
880 {{f16, f32}, {f16, f64}, {f32, f64}, {v4f16, v4f32}, {v2f32, v2f64}})
881 .legalFor(ST.hasBF16(), {{bf16, f32}, {v4bf16, v4f32}})
882 .libcallFor({{f16, f128}, {f32, f128}, {f64, f128}})
884 .customIf([](const LegalityQuery &Q) {
885 LLT DstTy = Q.Types[0];
886 LLT SrcTy = Q.Types[1];
887 return SrcTy.getScalarSizeInBits() == 64 &&
888 DstTy.getScalarSizeInBits() == 16;
889 })
890 .lowerFor({{bf16, f32}, {v4bf16, v4f32}})
891 // Clamp based on input
892 .clampNumElements(1, v4s32, v4s32)
893 .clampNumElements(1, v2s64, v2s64)
894 .scalarize(0);
895
896 getActionDefinitionsBuilder(G_FPEXT)
897 .legalFor({{f32, f16},
898 {f64, f16},
899 {f32, bf16},
900 {f64, f32},
901 {v4f32, v4f16},
902 {v4f32, v4bf16},
903 {v2f64, v2f32}})
904 .libcallFor({{f128, f64}, {f128, f32}, {f128, f16}})
907 [](const LegalityQuery &Q) {
908 LLT DstTy = Q.Types[0];
909 LLT SrcTy = Q.Types[1];
910 return SrcTy.isVector() && DstTy.isVector() &&
911 SrcTy.getScalarSizeInBits() == 16 &&
912 DstTy.getScalarSizeInBits() == 64;
913 },
914 changeElementTo(1, f32))
915 .clampNumElements(0, v4s32, v4s32)
916 .clampNumElements(0, v2s64, v2s64)
917 .scalarize(0);
918
919 // Conversions
920 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
921 .legalFor({{i32, f32},
922 {i64, f32},
923 {i32, f64},
924 {i64, f64},
925 {v2i32, v2f32},
926 {v4i32, v4f32},
927 {v2i64, v2f64}})
928 .legalFor(HasFP16,
929 {{i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
930 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
932 // The range of a fp16 value fits into an i17, so we can lower the width
933 // to i64.
935 [=](const LegalityQuery &Query) {
936 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
937 },
938 changeTo(0, i64))
941 .minScalar(0, s32)
942 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
944 [=](const LegalityQuery &Query) {
945 return Query.Types[0].getScalarSizeInBits() <= 64 &&
946 Query.Types[0].getScalarSizeInBits() >
947 Query.Types[1].getScalarSizeInBits();
948 },
950 .widenScalarIf(
951 [=](const LegalityQuery &Query) {
952 return Query.Types[1].getScalarSizeInBits() <= 64 &&
953 Query.Types[0].getScalarSizeInBits() <
954 Query.Types[1].getScalarSizeInBits();
955 },
957 .clampNumElements(0, v4s16, v8s16)
958 .clampNumElements(0, v2s32, v4s32)
959 .clampMaxNumElements(0, s64, 2)
960 .libcallFor(
961 {{i32, f128}, {i64, f128}, {i128, f128}, {i128, f32}, {i128, f64}});
962
963 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
964 .legalFor({{i32, f32},
965 {i64, f32},
966 {i32, f64},
967 {i64, f64},
968 {v2i32, v2f32},
969 {v4i32, v4f32},
970 {v2i64, v2f64}})
971 .legalFor(
972 HasFP16,
973 {{i16, f16}, {i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
974 // Handle types larger than i64 by scalarizing/lowering.
975 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
977 // The range of a fp16 value fits into an i17, so we can lower the width
978 // to i64.
980 [=](const LegalityQuery &Query) {
981 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
982 },
983 changeTo(0, i64))
984 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
986 .widenScalarToNextPow2(0, /*MinSize=*/32)
987 .minScalar(0, s32)
988 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
990 [=](const LegalityQuery &Query) {
991 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
992 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
993 ITySize > Query.Types[1].getScalarSizeInBits();
994 },
996 .widenScalarIf(
997 [=](const LegalityQuery &Query) {
998 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
999 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
1000 Query.Types[0].getScalarSizeInBits() < FTySize;
1001 },
1004 .clampNumElements(0, v4s16, v8s16)
1005 .clampNumElements(0, v2s32, v4s32)
1006 .clampMaxNumElements(0, s64, 2);
1007
1008 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
1009 .legalFor({{f32, i32},
1010 {f64, i32},
1011 {f32, i64},
1012 {f64, i64},
1013 {v2f32, v2i32},
1014 {v4f32, v4i32},
1015 {v2f64, v2i64}})
1016 .legalFor(HasFP16,
1017 {{f16, i32}, {f16, i64}, {v4f16, v4i16}, {v8f16, v8i16}})
1018 .unsupportedIf([&](const LegalityQuery &Query) {
1019 return Query.Types[0].getScalarType().isBFloat16();
1020 })
1021 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
1025 .minScalar(1, f32)
1026 .lowerIf([](const LegalityQuery &Query) {
1027 return Query.Types[1].isVector() &&
1028 Query.Types[1].getScalarSizeInBits() == 64 &&
1029 Query.Types[0].getScalarSizeInBits() == 16;
1030 })
1031 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
1032 .scalarizeIf(
1033 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
1034 [](const LegalityQuery &Query) {
1035 return Query.Types[0].getScalarSizeInBits() == 32 &&
1036 Query.Types[1].getScalarSizeInBits() == 64;
1037 },
1038 0)
1039 .widenScalarIf(
1040 [](const LegalityQuery &Query) {
1041 return Query.Types[1].getScalarSizeInBits() <= 64 &&
1042 Query.Types[0].getScalarSizeInBits() <
1043 Query.Types[1].getScalarSizeInBits();
1044 },
1046 .widenScalarIf(
1047 [](const LegalityQuery &Query) {
1048 return Query.Types[0].getScalarSizeInBits() <= 64 &&
1049 Query.Types[0].getScalarSizeInBits() >
1050 Query.Types[1].getScalarSizeInBits();
1051 },
1053 .clampNumElements(0, v4s16, v8s16)
1054 .clampNumElements(0, v2s32, v4s32)
1055 .clampMaxNumElements(0, s64, 2)
1056 .libcallFor({{f16, i128},
1057 {f32, i128},
1058 {f64, i128},
1059 {f128, i128},
1060 {f128, i32},
1061 {f128, i64}});
1062
1063 // Control-flow
1064 getActionDefinitionsBuilder(G_BR).alwaysLegal();
1065 getActionDefinitionsBuilder(G_BRCOND)
1066 .legalFor({s32})
1067 .clampScalar(0, s32, s32);
1068 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1069
1070 getActionDefinitionsBuilder(G_SELECT)
1071 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1072 .widenScalarToNextPow2(0)
1073 .clampScalar(0, s32, s64)
1074 .clampScalar(1, s32, s32)
1077 .lowerIf(isVector(0));
1078
1079 // Pointer-handling
1080 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1081
1082 if (TM.getCodeModel() == CodeModel::Small)
1083 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1084 else
1085 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1086
1087 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1088 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1089
1090 getActionDefinitionsBuilder(G_PTRTOINT)
1091 .legalFor({{i64, p0}, {v2i64, v2p0}})
1092 .widenScalarToNextPow2(0, 64)
1093 .clampScalar(0, s64, s64)
1094 .clampMaxNumElements(0, s64, 2);
1095
1096 getActionDefinitionsBuilder(G_INTTOPTR)
1097 .unsupportedIf([&](const LegalityQuery &Query) {
1098 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1099 })
1100 .legalFor({{p0, i64}, {v2p0, v2i64}})
1101 .clampMaxNumElements(1, s64, 2);
1102
1103 // Casts for 32 and 64-bit width type are just copies.
1104 // Same for 128-bit width type, except they are on the FPR bank.
1105 getActionDefinitionsBuilder(G_BITCAST)
1107 // Keeping 32-bit instructions legal to prevent regression in some tests
1108 .legalForCartesianProduct({s32, v2s16, v4s8})
1109 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1110 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1111 .customIf([=](const LegalityQuery &Query) {
1112 // Handle casts from i1 vectors to scalars.
1113 LLT DstTy = Query.Types[0];
1114 LLT SrcTy = Query.Types[1];
1115 return DstTy.isScalar() && SrcTy.isVector() &&
1116 SrcTy.getScalarSizeInBits() == 1;
1117 })
1118 .lowerIf([=](const LegalityQuery &Query) {
1119 return Query.Types[0].isVector() != Query.Types[1].isVector();
1120 })
1122 .clampNumElements(0, v8s8, v16s8)
1123 .clampNumElements(0, v4s16, v8s16)
1124 .clampNumElements(0, v2s32, v4s32)
1125 .clampMaxNumElements(0, s64, 2)
1126 .lower();
1127
1128 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1129
1130 // va_list must be a pointer, but most sized types are pretty easy to handle
1131 // as the destination.
1132 getActionDefinitionsBuilder(G_VAARG)
1133 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1134 .clampScalar(0, s8, s64)
1135 .widenScalarToNextPow2(0, /*Min*/ 8);
1136
1137 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1138 .lowerIf(
1139 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1140
1141 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1142
1143 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1144 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1145 .customFor(!UseOutlineAtomics, {{s128, p0}})
1146 .libcallFor(UseOutlineAtomics,
1147 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1148 .clampScalar(0, s32, s64);
1149
1150 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1151 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1152 G_ATOMICRMW_XOR})
1153 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1154 .libcallFor(UseOutlineAtomics,
1155 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1156 .clampScalar(0, s32, s64);
1157
1158 // Do not outline these atomics operations, as per comment in
1159 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1160 getActionDefinitionsBuilder(
1161 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1162 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1163 .clampScalar(0, s32, s64);
1164
1165 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1166
1167 // Merge/Unmerge
1168 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1169 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1170 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1171 getActionDefinitionsBuilder(Op)
1172 .widenScalarToNextPow2(LitTyIdx, 8)
1173 .widenScalarToNextPow2(BigTyIdx, 32)
1174 .clampScalar(LitTyIdx, s8, s64)
1175 .clampScalar(BigTyIdx, s32, s128)
1176 .legalIf([=](const LegalityQuery &Q) {
1177 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1178 case 32:
1179 case 64:
1180 case 128:
1181 break;
1182 default:
1183 return false;
1184 }
1185 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1186 case 8:
1187 case 16:
1188 case 32:
1189 case 64:
1190 return true;
1191 default:
1192 return false;
1193 }
1194 });
1195 }
1196
1197 // TODO : nxv4s16, nxv2s16, nxv2s32
1198 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1199 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1200 {s16, nxv8s16, s64},
1201 {s32, nxv4s32, s64},
1202 {s64, nxv2s64, s64}})
1203 .unsupportedIf([=](const LegalityQuery &Query) {
1204 const LLT &EltTy = Query.Types[1].getElementType();
1205 if (Query.Types[1].isScalableVector())
1206 return false;
1207 return Query.Types[0] != EltTy;
1208 })
1209 .minScalar(2, s64)
1210 .customIf([=](const LegalityQuery &Query) {
1211 const LLT &VecTy = Query.Types[1];
1212 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1213 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1214 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1215 })
1216 .minScalarOrEltIf(
1217 [=](const LegalityQuery &Query) {
1218 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1219 // cause the total vec size to be > 128b.
1220 return Query.Types[1].isFixedVector() &&
1221 Query.Types[1].getNumElements() <= 2;
1222 },
1223 0, s64)
1224 .minScalarOrEltIf(
1225 [=](const LegalityQuery &Query) {
1226 return Query.Types[1].isFixedVector() &&
1227 Query.Types[1].getNumElements() <= 4;
1228 },
1229 0, s32)
1230 .minScalarOrEltIf(
1231 [=](const LegalityQuery &Query) {
1232 return Query.Types[1].isFixedVector() &&
1233 Query.Types[1].getNumElements() <= 8;
1234 },
1235 0, s16)
1236 .minScalarOrEltIf(
1237 [=](const LegalityQuery &Query) {
1238 return Query.Types[1].isFixedVector() &&
1239 Query.Types[1].getNumElements() <= 16;
1240 },
1241 0, s8)
1242 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1243 .moreElementsToNextPow2(1)
1244 .clampMaxNumElements(1, s64, 2)
1245 .clampMaxNumElements(1, s32, 4)
1246 .clampMaxNumElements(1, s16, 8)
1247 .clampMaxNumElements(1, s8, 16)
1248 .clampMaxNumElements(1, p0, 2)
1249 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1);
1250
1251 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1252 .legalIf(
1253 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1254 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1255 {nxv8s16, s32, s64},
1256 {nxv4s32, s32, s64},
1257 {nxv2s64, s64, s64}})
1259 .widenVectorEltsToVectorMinSize(0, 64)
1260 .clampNumElements(0, v8s8, v16s8)
1261 .clampNumElements(0, v4s16, v8s16)
1262 .clampNumElements(0, v2s32, v4s32)
1263 .clampMaxNumElements(0, s64, 2)
1264 .clampMaxNumElements(0, p0, 2)
1265 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
1266
1267 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1268 .legalFor({{v8s8, s8},
1269 {v16s8, s8},
1270 {v4s16, s16},
1271 {v8s16, s16},
1272 {v2s32, s32},
1273 {v4s32, s32},
1274 {v2s64, s64},
1275 {v2p0, p0}})
1276 .clampNumElements(0, v4s32, v4s32)
1277 .clampNumElements(0, v2s64, v2s64)
1278 .minScalarOrElt(0, s8)
1279 .widenVectorEltsToVectorMinSize(0, 64)
1280 .widenScalarOrEltToNextPow2(0)
1281 .minScalarSameAs(1, 0);
1282
1283 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1284
1285 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1286 .legalIf([=](const LegalityQuery &Query) {
1287 const LLT &DstTy = Query.Types[0];
1288 const LLT &SrcTy = Query.Types[1];
1289 // For now just support the TBL2 variant which needs the source vectors
1290 // to be the same size as the dest.
1291 if (DstTy != SrcTy)
1292 return false;
1293 return llvm::is_contained(
1294 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1295 })
1296 .moreElementsIf(
1297 [](const LegalityQuery &Query) {
1298 return Query.Types[0].getNumElements() >
1299 Query.Types[1].getNumElements();
1300 },
1301 changeTo(1, 0))
1303 .moreElementsIf(
1304 [](const LegalityQuery &Query) {
1305 return Query.Types[0].getNumElements() <
1306 Query.Types[1].getNumElements();
1307 },
1308 changeTo(0, 1))
1309 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1310 .clampNumElements(0, v8s8, v16s8)
1311 .clampNumElements(0, v4s16, v8s16)
1312 .clampNumElements(0, v4s32, v4s32)
1313 .clampNumElements(0, v2s64, v2s64)
1314 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
1315 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1316 // Bitcast pointers vector to i64.
1317 const LLT DstTy = Query.Types[0];
1318 return std::pair(
1319 0, LLT::vector(DstTy.getElementCount(), LLT::integer(64)));
1320 });
1321
1322 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1323 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1324 .customIf([=](const LegalityQuery &Query) {
1325 return Query.Types[0].isFixedVector() &&
1326 Query.Types[0].getScalarSizeInBits() < 8;
1327 })
1328 .bitcastIf(
1329 [=](const LegalityQuery &Query) {
1330 return Query.Types[0].isFixedVector() &&
1331 Query.Types[1].isFixedVector() &&
1332 Query.Types[0].getScalarSizeInBits() >= 8 &&
1333 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1334 Query.Types[0].getSizeInBits() <= 128 &&
1335 Query.Types[1].getSizeInBits() <= 64;
1336 },
1337 [=](const LegalityQuery &Query) {
1338 const LLT DstTy = Query.Types[0];
1339 const LLT SrcTy = Query.Types[1];
1340 return std::pair(
1341 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1344 SrcTy.getNumElements())));
1345 });
1346
1347 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1348 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1350 .immIdx(0); // Inform verifier imm idx 0 is handled.
1351
1352 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1353 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1354 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1355
1356 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1357
1358 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1359
1360 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1361
1362 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1363
1364 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1365
1366 if (ST.hasMOPS()) {
1367 // G_BZERO is not supported. Currently it is only emitted by
1368 // PreLegalizerCombiner for G_MEMSET with zero constant.
1369 getActionDefinitionsBuilder(G_BZERO).unsupported();
1370
1371 getActionDefinitionsBuilder(G_MEMSET)
1372 .legalForCartesianProduct({p0}, {s64}, {s64})
1373 .customForCartesianProduct({p0}, {s8}, {s64})
1374 .immIdx(0); // Inform verifier imm idx 0 is handled.
1375
1376 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1377 .legalForCartesianProduct({p0}, {p0}, {s64})
1378 .immIdx(0); // Inform verifier imm idx 0 is handled.
1379
1380 // G_MEMCPY_INLINE does not have a tailcall immediate
1381 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1382 .legalForCartesianProduct({p0}, {p0}, {s64});
1383
1384 } else {
1385 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1386 .libcall();
1387 }
1388
1389 // For fadd reductions we have pairwise operations available. We treat the
1390 // usual legal types as legal and handle the lowering to pairwise instructions
1391 // later.
1392 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1393 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1394 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1395 .minScalarOrElt(0, MinFPScalar)
1396 .clampMaxNumElements(1, s64, 2)
1397 .clampMaxNumElements(1, s32, 4)
1398 .clampMaxNumElements(1, s16, 8)
1399 .moreElementsToNextPow2(1)
1400 .scalarize(1)
1401 .lower();
1402
1403 // For fmul reductions we need to split up into individual operations. We
1404 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1405 // smaller types, followed by scalarizing what remains.
1406 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1407 .minScalarOrElt(0, MinFPScalar)
1408 .clampMaxNumElements(1, s64, 2)
1409 .clampMaxNumElements(1, s32, 4)
1410 .clampMaxNumElements(1, s16, 8)
1411 .clampMaxNumElements(1, s32, 2)
1412 .clampMaxNumElements(1, s16, 4)
1413 .scalarize(1)
1414 .lower();
1415
1416 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1417 .scalarize(2)
1418 .lower();
1419
1420 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1421 .legalFor({{i8, v8i8},
1422 {i8, v16i8},
1423 {i16, v4i16},
1424 {i16, v8i16},
1425 {i32, v2i32},
1426 {i32, v4i32},
1427 {i64, v2i64}})
1429 .clampMaxNumElements(1, s64, 2)
1430 .clampMaxNumElements(1, s32, 4)
1431 .clampMaxNumElements(1, s16, 8)
1432 .clampMaxNumElements(1, s8, 16)
1433 .widenVectorEltsToVectorMinSize(1, 64)
1434 .scalarize(1);
1435
1436 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1437 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1438 .legalFor({{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1439 .legalFor(HasFP16, {{f16, v4f16}, {f16, v8f16}})
1440 .minScalarOrElt(0, MinFPScalar)
1441 .clampMaxNumElements(1, s64, 2)
1442 .clampMaxNumElements(1, s32, 4)
1443 .clampMaxNumElements(1, s16, 8)
1444 .scalarize(1)
1445 .lower();
1446
1447 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1448 .clampMaxNumElements(1, s32, 2)
1449 .clampMaxNumElements(1, s16, 4)
1450 .clampMaxNumElements(1, s8, 8)
1451 .scalarize(1)
1452 .lower();
1453
1454 getActionDefinitionsBuilder(
1455 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1456 .legalFor({{i8, v8i8},
1457 {i8, v16i8},
1458 {i16, v4i16},
1459 {i16, v8i16},
1460 {i32, v2i32},
1461 {i32, v4i32}})
1462 .moreElementsIf(
1463 [=](const LegalityQuery &Query) {
1464 return Query.Types[1].isVector() &&
1465 Query.Types[1].getElementType() != s8 &&
1466 Query.Types[1].getNumElements() & 1;
1467 },
1469 .clampMaxNumElements(1, s64, 2)
1470 .clampMaxNumElements(1, s32, 4)
1471 .clampMaxNumElements(1, s16, 8)
1472 .clampMaxNumElements(1, s8, 16)
1473 .scalarize(1)
1474 .lower();
1475
1476 getActionDefinitionsBuilder(
1477 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1478 // Try to break down into smaller vectors as long as they're at least 64
1479 // bits. This lets us use vector operations for some parts of the
1480 // reduction.
1481 .fewerElementsIf(
1482 [=](const LegalityQuery &Q) {
1483 LLT SrcTy = Q.Types[1];
1484 if (SrcTy.isScalar())
1485 return false;
1486 if (!isPowerOf2_32(SrcTy.getNumElements()))
1487 return false;
1488 // We can usually perform 64b vector operations.
1489 return SrcTy.getSizeInBits() > 64;
1490 },
1491 [=](const LegalityQuery &Q) {
1492 LLT SrcTy = Q.Types[1];
1493 return std::make_pair(1, SrcTy.divide(2));
1494 })
1495 .scalarize(1)
1496 .lower();
1497
1498 // TODO: Update this to correct handling when adding AArch64/SVE support.
1499 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1500
1501 // Access to floating-point environment.
1502 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1503 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1504 .libcall();
1505
1506 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1507
1508 getActionDefinitionsBuilder(G_PREFETCH).custom();
1509
1510 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1511
1512 getActionDefinitionsBuilder({G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS})
1513 .alwaysLegal();
1514 getActionDefinitionsBuilder(G_FENCE).alwaysLegal();
1515 getActionDefinitionsBuilder(G_INVOKE_REGION_START).alwaysLegal();
1516
1517 getLegacyLegalizerInfo().computeTables();
1518 verify(*ST.getInstrInfo());
1519}
1520
1523 LostDebugLocObserver &LocObserver) const {
1524 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1525 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1526 GISelChangeObserver &Observer = Helper.Observer;
1527 switch (MI.getOpcode()) {
1528 default:
1529 // No idea what to do.
1530 return false;
1531 case TargetOpcode::G_VAARG:
1532 return legalizeVaArg(MI, MRI, MIRBuilder);
1533 case TargetOpcode::G_LOAD:
1534 case TargetOpcode::G_STORE:
1535 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1536 case TargetOpcode::G_SHL:
1537 case TargetOpcode::G_ASHR:
1538 case TargetOpcode::G_LSHR:
1539 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1540 case TargetOpcode::G_GLOBAL_VALUE:
1541 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1542 case TargetOpcode::G_SBFX:
1543 case TargetOpcode::G_UBFX:
1544 return legalizeBitfieldExtract(MI, MRI, Helper);
1545 case TargetOpcode::G_FSHL:
1546 case TargetOpcode::G_FSHR:
1547 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1548 case TargetOpcode::G_ROTR:
1549 return legalizeRotate(MI, MRI, Helper);
1550 case TargetOpcode::G_CTPOP:
1551 return legalizeCTPOP(MI, MRI, Helper);
1552 case TargetOpcode::G_ATOMIC_CMPXCHG:
1553 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1554 case TargetOpcode::G_CTTZ:
1555 return legalizeCTTZ(MI, Helper);
1556 case TargetOpcode::G_BZERO:
1557 case TargetOpcode::G_MEMCPY:
1558 case TargetOpcode::G_MEMMOVE:
1559 case TargetOpcode::G_MEMSET:
1560 return legalizeMemOps(MI, Helper);
1561 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1562 return legalizeExtractVectorElt(MI, MRI, Helper);
1563 case TargetOpcode::G_DYN_STACKALLOC:
1564 return legalizeDynStackAlloc(MI, Helper);
1565 case TargetOpcode::G_PREFETCH:
1566 return legalizePrefetch(MI, Helper);
1567 case TargetOpcode::G_ABS:
1568 return Helper.lowerAbsToCNeg(MI);
1569 case TargetOpcode::G_ICMP:
1570 return legalizeICMP(MI, MRI, MIRBuilder);
1571 case TargetOpcode::G_BITCAST:
1572 return legalizeBitcast(MI, Helper);
1573 case TargetOpcode::G_CONCAT_VECTORS:
1574 return legalizeConcatVectors(MI, MRI, MIRBuilder);
1575 case TargetOpcode::G_FPTRUNC:
1576 // In order to lower f16 to f64 properly, we need to use f32 as an
1577 // intermediary
1578 return legalizeFptrunc(MI, MIRBuilder, MRI);
1579 }
1580
1581 llvm_unreachable("expected switch to return");
1582}
1583
1584bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1585 LegalizerHelper &Helper) const {
1586 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1587 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1588 // We're trying to handle casts from i1 vectors to scalars but reloading from
1589 // stack.
1590 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1591 SrcTy.getElementType() != LLT::scalar(1))
1592 return false;
1593
1594 Helper.createStackStoreLoad(DstReg, SrcReg);
1595 MI.eraseFromParent();
1596 return true;
1597}
1598
1599bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1601 MachineIRBuilder &MIRBuilder,
1602 GISelChangeObserver &Observer,
1603 LegalizerHelper &Helper) const {
1604 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1605 MI.getOpcode() == TargetOpcode::G_FSHR);
1606
1607 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1608 // lowering
1609 Register ShiftNo = MI.getOperand(3).getReg();
1610 LLT ShiftTy = MRI.getType(ShiftNo);
1611 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1612
1613 // Adjust shift amount according to Opcode (FSHL/FSHR)
1614 // Convert FSHL to FSHR
1615 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1616 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1617
1618 // Lower non-constant shifts and leave zero shifts to the optimizer.
1619 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1620 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1622
1623 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1624
1625 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1626
1627 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1628 // in the range of 0 <-> BitWidth, it is legal
1629 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1630 VRegAndVal->Value.ult(BitWidth))
1631 return true;
1632
1633 // Cast the ShiftNumber to a 64-bit type
1634 auto Cast64 = MIRBuilder.buildConstant(LLT::integer(64), Amount.zext(64));
1635
1636 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1637 Observer.changingInstr(MI);
1638 MI.getOperand(3).setReg(Cast64.getReg(0));
1639 Observer.changedInstr(MI);
1640 }
1641 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1642 // instruction
1643 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1644 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1645 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1646 Cast64.getReg(0)});
1647 MI.eraseFromParent();
1648 }
1649 return true;
1650}
1651
1652bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1654 MachineIRBuilder &MIRBuilder) const {
1655 Register DstReg = MI.getOperand(0).getReg();
1656 Register SrcReg1 = MI.getOperand(2).getReg();
1657 Register SrcReg2 = MI.getOperand(3).getReg();
1658 LLT DstTy = MRI.getType(DstReg);
1659 LLT SrcTy = MRI.getType(SrcReg1);
1660
1661 // Check the vector types are legal
1662 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1663 DstTy.getNumElements() != SrcTy.getNumElements() ||
1664 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1665 return false;
1666
1667 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1668 // following passes
1669 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1670 if (Pred != CmpInst::ICMP_NE)
1671 return true;
1672 Register CmpReg =
1673 MIRBuilder
1674 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1675 .getReg(0);
1676 MIRBuilder.buildNot(DstReg, CmpReg);
1677
1678 MI.eraseFromParent();
1679 return true;
1680}
1681
1682bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1684 LegalizerHelper &Helper) const {
1685 // To allow for imported patterns to match, we ensure that the rotate amount
1686 // is 64b with an extension.
1687 Register AmtReg = MI.getOperand(2).getReg();
1688 LLT AmtTy = MRI.getType(AmtReg);
1689 (void)AmtTy;
1690 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1691 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1692 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::integer(64), AmtReg);
1693 Helper.Observer.changingInstr(MI);
1694 MI.getOperand(2).setReg(NewAmt.getReg(0));
1695 Helper.Observer.changedInstr(MI);
1696 return true;
1697}
1698
1699bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1701 GISelChangeObserver &Observer) const {
1702 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1703 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1704 // G_ADD_LOW instructions.
1705 // By splitting this here, we can optimize accesses in the small code model by
1706 // folding in the G_ADD_LOW into the load/store offset.
1707 auto &GlobalOp = MI.getOperand(1);
1708 // Don't modify an intrinsic call.
1709 if (GlobalOp.isSymbol())
1710 return true;
1711 const auto* GV = GlobalOp.getGlobal();
1712 if (GV->isThreadLocal())
1713 return true; // Don't want to modify TLS vars.
1714
1715 auto &TM = ST->getTargetLowering()->getTargetMachine();
1716 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1717
1718 if (OpFlags & AArch64II::MO_GOT)
1719 return true;
1720
1721 auto Offset = GlobalOp.getOffset();
1722 Register DstReg = MI.getOperand(0).getReg();
1723 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1724 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1725 // Set the regclass on the dest reg too.
1726 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1727
1728 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1729 // by creating a MOVK that sets bits 48-63 of the register to (global address
1730 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1731 // prevent an incorrect tag being generated during relocation when the
1732 // global appears before the code section. Without the offset, a global at
1733 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1734 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1735 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1736 // instead of `0xf`.
1737 // This assumes that we're in the small code model so we can assume a binary
1738 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1739 // binary must also be loaded into address range [0, 2^48). Both of these
1740 // properties need to be ensured at runtime when using tagged addresses.
1741 if (OpFlags & AArch64II::MO_TAGGED) {
1742 assert(!Offset &&
1743 "Should not have folded in an offset for a tagged global!");
1744 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1745 .addGlobalAddress(GV, 0x100000000,
1747 .addImm(48);
1748 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1749 }
1750
1751 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1752 .addGlobalAddress(GV, Offset,
1754 MI.eraseFromParent();
1755 return true;
1756}
1757
1759 MachineInstr &MI) const {
1760 MachineIRBuilder &MIB = Helper.MIRBuilder;
1761 MachineRegisterInfo &MRI = *MIB.getMRI();
1762
1763 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1764 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1765 MI.eraseFromParent();
1766 return true;
1767 };
1768 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1769 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1770 {MI.getOperand(2), MI.getOperand(3)});
1771 MI.eraseFromParent();
1772 return true;
1773 };
1774 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1775 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1776 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1777 MI.eraseFromParent();
1778 return true;
1779 };
1780
1781 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1782 switch (IntrinsicID) {
1783 case Intrinsic::vacopy: {
1784 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1785 unsigned VaListSize =
1786 (ST->isTargetDarwin() || ST->isTargetWindows())
1787 ? PtrSize
1788 : ST->isTargetILP32() ? 20 : 32;
1789
1790 MachineFunction &MF = *MI.getMF();
1792 LLT::scalar(VaListSize * 8));
1793 MIB.buildLoad(Val, MI.getOperand(2),
1796 VaListSize, Align(PtrSize)));
1797 MIB.buildStore(Val, MI.getOperand(1),
1800 VaListSize, Align(PtrSize)));
1801 MI.eraseFromParent();
1802 return true;
1803 }
1804 case Intrinsic::get_dynamic_area_offset: {
1805 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1806 MI.eraseFromParent();
1807 return true;
1808 }
1809 case Intrinsic::aarch64_mops_memset_tag: {
1810 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1811 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1812 // the instruction).
1813 auto &Value = MI.getOperand(3);
1814 Register ExtValueReg = MIB.buildAnyExt(LLT::integer(64), Value).getReg(0);
1815 Value.setReg(ExtValueReg);
1816 return true;
1817 }
1818 case Intrinsic::aarch64_prefetch: {
1819 auto &AddrVal = MI.getOperand(1);
1820
1821 int64_t IsWrite = MI.getOperand(2).getImm();
1822 int64_t Target = MI.getOperand(3).getImm();
1823 int64_t IsStream = MI.getOperand(4).getImm();
1824 int64_t IsData = MI.getOperand(5).getImm();
1825
1826 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1827 (!IsData << 3) | // IsDataCache bit
1828 (Target << 1) | // Cache level bits
1829 (unsigned)IsStream; // Stream bit
1830
1831 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1832 MI.eraseFromParent();
1833 return true;
1834 }
1835 case Intrinsic::aarch64_range_prefetch: {
1836 auto &AddrVal = MI.getOperand(1);
1837
1838 int64_t IsWrite = MI.getOperand(2).getImm();
1839 int64_t IsStream = MI.getOperand(3).getImm();
1840 unsigned PrfOp = (IsStream << 2) | IsWrite;
1841
1842 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1843 .addImm(PrfOp)
1844 .add(AddrVal)
1845 .addUse(MI.getOperand(4).getReg()); // Metadata
1846 MI.eraseFromParent();
1847 return true;
1848 }
1849 case Intrinsic::aarch64_prefetch_ir: {
1850 auto &AddrVal = MI.getOperand(1);
1851 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1852 MI.eraseFromParent();
1853 return true;
1854 }
1855 case Intrinsic::aarch64_neon_uaddv:
1856 case Intrinsic::aarch64_neon_saddv:
1857 case Intrinsic::aarch64_neon_umaxv:
1858 case Intrinsic::aarch64_neon_smaxv:
1859 case Intrinsic::aarch64_neon_uminv:
1860 case Intrinsic::aarch64_neon_sminv: {
1861 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1862 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1863 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1864
1865 auto OldDst = MI.getOperand(0).getReg();
1866 auto OldDstTy = MRI.getType(OldDst);
1867 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1868 if (OldDstTy == NewDstTy)
1869 return true;
1870
1871 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1872
1873 Helper.Observer.changingInstr(MI);
1874 MI.getOperand(0).setReg(NewDst);
1875 Helper.Observer.changedInstr(MI);
1876
1877 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1878 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1879 OldDst, NewDst);
1880
1881 return true;
1882 }
1883 case Intrinsic::aarch64_neon_uaddlp:
1884 case Intrinsic::aarch64_neon_saddlp: {
1885 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1886 ? AArch64::G_UADDLP
1887 : AArch64::G_SADDLP;
1888 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1889 MI.eraseFromParent();
1890
1891 return true;
1892 }
1893 case Intrinsic::aarch64_neon_uaddlv:
1894 case Intrinsic::aarch64_neon_saddlv: {
1895 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1896 ? AArch64::G_UADDLV
1897 : AArch64::G_SADDLV;
1898 Register DstReg = MI.getOperand(0).getReg();
1899 Register SrcReg = MI.getOperand(2).getReg();
1900 LLT DstTy = MRI.getType(DstReg);
1901
1902 LLT MidTy, ExtTy;
1903 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1904 ExtTy = LLT::integer(32);
1905 MidTy = LLT::fixed_vector(4, ExtTy);
1906 } else {
1907 ExtTy = LLT::integer(64);
1908 MidTy = LLT::fixed_vector(2, ExtTy);
1909 }
1910
1911 Register MidReg =
1912 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1913 Register ZeroReg =
1914 MIB.buildConstant(LLT::integer(64), 0)->getOperand(0).getReg();
1915 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1916 {MidReg, ZeroReg})
1917 .getReg(0);
1918
1919 if (DstTy.getScalarSizeInBits() < 32)
1920 MIB.buildTrunc(DstReg, ExtReg);
1921 else
1922 MIB.buildCopy(DstReg, ExtReg);
1923
1924 MI.eraseFromParent();
1925
1926 return true;
1927 }
1928 case Intrinsic::aarch64_neon_smax:
1929 return LowerBinOp(TargetOpcode::G_SMAX);
1930 case Intrinsic::aarch64_neon_smin:
1931 return LowerBinOp(TargetOpcode::G_SMIN);
1932 case Intrinsic::aarch64_neon_umax:
1933 return LowerBinOp(TargetOpcode::G_UMAX);
1934 case Intrinsic::aarch64_neon_umin:
1935 return LowerBinOp(TargetOpcode::G_UMIN);
1936 case Intrinsic::aarch64_neon_fmax:
1937 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1938 case Intrinsic::aarch64_neon_fmin:
1939 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1940 case Intrinsic::aarch64_neon_fmaxnm:
1941 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1942 case Intrinsic::aarch64_neon_fminnm:
1943 return LowerBinOp(TargetOpcode::G_FMINNUM);
1944 case Intrinsic::aarch64_neon_pmull:
1945 case Intrinsic::aarch64_neon_pmull64:
1946 return LowerBinOp(AArch64::G_PMULL);
1947 case Intrinsic::aarch64_neon_smull:
1948 return LowerBinOp(AArch64::G_SMULL);
1949 case Intrinsic::aarch64_neon_umull:
1950 return LowerBinOp(AArch64::G_UMULL);
1951 case Intrinsic::aarch64_neon_sabd:
1952 return LowerBinOp(TargetOpcode::G_ABDS);
1953 case Intrinsic::aarch64_neon_uabd:
1954 return LowerBinOp(TargetOpcode::G_ABDU);
1955 case Intrinsic::aarch64_neon_uhadd:
1956 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1957 case Intrinsic::aarch64_neon_urhadd:
1958 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1959 case Intrinsic::aarch64_neon_shadd:
1960 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1961 case Intrinsic::aarch64_neon_srhadd:
1962 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1963 case Intrinsic::aarch64_neon_sqshrn: {
1964 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1965 return true;
1966 // Create right shift instruction. Store the output register in Shr.
1967 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1968 {MRI.getType(MI.getOperand(2).getReg())},
1969 {MI.getOperand(2), MI.getOperand(3).getImm()});
1970 // Build the narrow intrinsic, taking in Shr.
1971 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1972 MI.eraseFromParent();
1973 return true;
1974 }
1975 case Intrinsic::aarch64_neon_sqshrun: {
1976 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1977 return true;
1978 // Create right shift instruction. Store the output register in Shr.
1979 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1980 {MRI.getType(MI.getOperand(2).getReg())},
1981 {MI.getOperand(2), MI.getOperand(3).getImm()});
1982 // Build the narrow intrinsic, taking in Shr.
1983 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1984 MI.eraseFromParent();
1985 return true;
1986 }
1987 case Intrinsic::aarch64_neon_sqrshrn: {
1988 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1989 return true;
1990 // Create right shift instruction. Store the output register in Shr.
1991 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1992 {MRI.getType(MI.getOperand(2).getReg())},
1993 {MI.getOperand(2), MI.getOperand(3).getImm()});
1994 // Build the narrow intrinsic, taking in Shr.
1995 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1996 MI.eraseFromParent();
1997 return true;
1998 }
1999 case Intrinsic::aarch64_neon_sqrshrun: {
2000 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2001 return true;
2002 // Create right shift instruction. Store the output register in Shr.
2003 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
2004 {MRI.getType(MI.getOperand(2).getReg())},
2005 {MI.getOperand(2), MI.getOperand(3).getImm()});
2006 // Build the narrow intrinsic, taking in Shr.
2007 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
2008 MI.eraseFromParent();
2009 return true;
2010 }
2011 case Intrinsic::aarch64_neon_uqrshrn: {
2012 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2013 return true;
2014 // Create right shift instruction. Store the output register in Shr.
2015 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
2016 {MRI.getType(MI.getOperand(2).getReg())},
2017 {MI.getOperand(2), MI.getOperand(3).getImm()});
2018 // Build the narrow intrinsic, taking in Shr.
2019 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2020 MI.eraseFromParent();
2021 return true;
2022 }
2023 case Intrinsic::aarch64_neon_uqshrn: {
2024 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
2025 return true;
2026 // Create right shift instruction. Store the output register in Shr.
2027 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
2028 {MRI.getType(MI.getOperand(2).getReg())},
2029 {MI.getOperand(2), MI.getOperand(3).getImm()});
2030 // Build the narrow intrinsic, taking in Shr.
2031 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
2032 MI.eraseFromParent();
2033 return true;
2034 }
2035 case Intrinsic::aarch64_neon_sqshlu: {
2036 // Check if last operand is constant vector dup
2037 auto ShiftAmount = isConstantOrConstantSplatVector(
2038 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
2039 if (ShiftAmount) {
2040 // If so, create a new intrinsic with the correct shift amount
2041 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
2042 {MI.getOperand(2)})
2043 .addImm(ShiftAmount->getSExtValue());
2044 MI.eraseFromParent();
2045 return true;
2046 }
2047 return false;
2048 }
2049 case Intrinsic::aarch64_neon_vsli: {
2050 MIB.buildInstr(
2051 AArch64::G_SLI, {MI.getOperand(0)},
2052 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2053 MI.eraseFromParent();
2054 break;
2055 }
2056 case Intrinsic::aarch64_neon_vsri: {
2057 MIB.buildInstr(
2058 AArch64::G_SRI, {MI.getOperand(0)},
2059 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
2060 MI.eraseFromParent();
2061 break;
2062 }
2063 case Intrinsic::aarch64_neon_abs: {
2064 // Lower the intrinsic to G_ABS.
2065 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
2066 MI.eraseFromParent();
2067 return true;
2068 }
2069 case Intrinsic::aarch64_neon_sqadd: {
2070 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2071 return LowerBinOp(TargetOpcode::G_SADDSAT);
2072 break;
2073 }
2074 case Intrinsic::aarch64_neon_sqsub: {
2075 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2076 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2077 break;
2078 }
2079 case Intrinsic::aarch64_neon_uqadd: {
2080 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2081 return LowerBinOp(TargetOpcode::G_UADDSAT);
2082 break;
2083 }
2084 case Intrinsic::aarch64_neon_uqsub: {
2085 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2086 return LowerBinOp(TargetOpcode::G_USUBSAT);
2087 break;
2088 }
2089 case Intrinsic::aarch64_neon_udot:
2090 return LowerTriOp(AArch64::G_UDOT);
2091 case Intrinsic::aarch64_neon_sdot:
2092 return LowerTriOp(AArch64::G_SDOT);
2093 case Intrinsic::aarch64_neon_usdot:
2094 return LowerTriOp(AArch64::G_USDOT);
2095 case Intrinsic::aarch64_neon_sqxtn:
2096 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2097 case Intrinsic::aarch64_neon_sqxtun:
2098 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2099 case Intrinsic::aarch64_neon_uqxtn:
2100 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2101 case Intrinsic::aarch64_neon_fcvtzu:
2102 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2103 case Intrinsic::aarch64_neon_fcvtzs:
2104 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2105
2106 case Intrinsic::vector_reverse:
2107 // TODO: Add support for vector_reverse
2108 return false;
2109 }
2110
2111 return true;
2112}
2113
2114bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2116 GISelChangeObserver &Observer) const {
2117 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2118 MI.getOpcode() == TargetOpcode::G_LSHR ||
2119 MI.getOpcode() == TargetOpcode::G_SHL);
2120 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2121 // imported patterns can select it later. Either way, it will be legal.
2122 Register AmtReg = MI.getOperand(2).getReg();
2123 LLT AmtRegEltTy = MRI.getType(AmtReg).getScalarType();
2124 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2125 if (!VRegAndVal)
2126 return true;
2127 // Check the shift amount is in range for an immediate form.
2128 int64_t Amount = VRegAndVal->Value.getSExtValue();
2129 if (Amount > 31)
2130 return true; // This will have to remain a register variant.
2131 auto ExtCst =
2132 MIRBuilder.buildConstant(AmtRegEltTy.changeElementSize(64), Amount);
2133 Observer.changingInstr(MI);
2134 MI.getOperand(2).setReg(ExtCst.getReg(0));
2135 Observer.changedInstr(MI);
2136 return true;
2137}
2138
2140 MachineRegisterInfo &MRI) {
2141 Base = Root;
2142 Offset = 0;
2143
2144 Register NewBase;
2145 int64_t NewOffset;
2146 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2147 isShiftedInt<7, 3>(NewOffset)) {
2148 Base = NewBase;
2149 Offset = NewOffset;
2150 }
2151}
2152
2153// FIXME: This should be removed and replaced with the generic bitcast legalize
2154// action.
2155bool AArch64LegalizerInfo::legalizeLoadStore(
2157 GISelChangeObserver &Observer) const {
2158 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2159 MI.getOpcode() == TargetOpcode::G_LOAD);
2160 // Here we just try to handle vector loads/stores where our value type might
2161 // have pointer elements, which the SelectionDAG importer can't handle. To
2162 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2163 // the value to use s64 types.
2164
2165 // Custom legalization requires the instruction, if not deleted, must be fully
2166 // legalized. In order to allow further legalization of the inst, we create
2167 // a new instruction and erase the existing one.
2168
2169 Register ValReg = MI.getOperand(0).getReg();
2170 const LLT ValTy = MRI.getType(ValReg);
2171
2172 if (ValTy == LLT::scalar(128)) {
2173
2174 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2175 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2176 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2177 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2178 bool IsRcpC3 =
2179 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2180
2181 LLT s64 = LLT::integer(64);
2182
2183 unsigned Opcode;
2184 if (IsRcpC3) {
2185 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2186 } else {
2187 // For LSE2, loads/stores should have been converted to monotonic and had
2188 // a fence inserted after them.
2189 assert(Ordering == AtomicOrdering::Monotonic ||
2190 Ordering == AtomicOrdering::Unordered);
2191 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2192
2193 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2194 }
2195
2196 MachineInstrBuilder NewI;
2197 if (IsLoad) {
2198 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2199 MIRBuilder.buildMergeLikeInstr(
2200 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2201 } else {
2202 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2203 NewI = MIRBuilder.buildInstr(
2204 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2205 }
2206
2207 if (IsRcpC3) {
2208 NewI.addUse(MI.getOperand(1).getReg());
2209 } else {
2210 Register Base;
2211 int Offset;
2212 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2213 NewI.addUse(Base);
2214 NewI.addImm(Offset / 8);
2215 }
2216
2217 NewI.cloneMemRefs(MI);
2218 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2219 *MRI.getTargetRegisterInfo(),
2220 *ST->getRegBankInfo());
2221 MI.eraseFromParent();
2222 return true;
2223 }
2224
2225 if (!ValTy.isPointerVector() ||
2226 ValTy.getElementType().getAddressSpace() != 0) {
2227 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2228 return false;
2229 }
2230
2231 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2232 const LLT NewTy = LLT::vector(ValTy.getElementCount(), LLT::integer(PtrSize));
2233 auto &MMO = **MI.memoperands_begin();
2234 MMO.setType(NewTy);
2235
2236 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2237 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2238 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2239 } else {
2240 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2241 MIRBuilder.buildBitcast(ValReg, NewLoad);
2242 }
2243 MI.eraseFromParent();
2244 return true;
2245}
2246
2247bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2249 MachineIRBuilder &MIRBuilder) const {
2250 MachineFunction &MF = MIRBuilder.getMF();
2251 Align Alignment(MI.getOperand(2).getImm());
2252 Register Dst = MI.getOperand(0).getReg();
2253 Register ListPtr = MI.getOperand(1).getReg();
2254
2255 LLT PtrTy = MRI.getType(ListPtr);
2256 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2257
2258 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2259 const Align PtrAlign = Align(PtrSize);
2260 auto List = MIRBuilder.buildLoad(
2261 PtrTy, ListPtr,
2262 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2263 PtrTy, PtrAlign));
2264
2265 MachineInstrBuilder DstPtr;
2266 if (Alignment > PtrAlign) {
2267 // Realign the list to the actual required alignment.
2268 auto AlignMinus1 =
2269 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2270 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2271 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2272 } else
2273 DstPtr = List;
2274
2275 LLT ValTy = MRI.getType(Dst);
2276 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2277 MIRBuilder.buildLoad(
2278 Dst, DstPtr,
2279 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2280 ValTy, std::max(Alignment, PtrAlign)));
2281
2282 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2283
2284 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2285
2286 MIRBuilder.buildStore(NewList, ListPtr,
2287 *MF.getMachineMemOperand(MachinePointerInfo(),
2289 PtrTy, PtrAlign));
2290
2291 MI.eraseFromParent();
2292 return true;
2293}
2294
2295bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2296 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2297 // Only legal if we can select immediate forms.
2298 // TODO: Lower this otherwise.
2299 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2300 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2301}
2302
2303bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2305 LegalizerHelper &Helper) const {
2306 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2307 // it can be more efficiently lowered to the following sequence that uses
2308 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2309 // registers are cheap.
2310 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2311 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2312 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2313 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2314 //
2315 // For 128 bit vector popcounts, we lower to the following sequence:
2316 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2317 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2318 // uaddlp.4s v0, v0 // v4s32, v2s64
2319 // uaddlp.2d v0, v0 // v2s64
2320 //
2321 // For 64 bit vector popcounts, we lower to the following sequence:
2322 // cnt.8b v0, v0 // v4s16, v2s32
2323 // uaddlp.4h v0, v0 // v4s16, v2s32
2324 // uaddlp.2s v0, v0 // v2s32
2325
2326 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2327 Register Dst = MI.getOperand(0).getReg();
2328 Register Val = MI.getOperand(1).getReg();
2329 LLT Ty = MRI.getType(Val);
2330
2331 LLT i64 = LLT::integer(64);
2332 LLT i32 = LLT::integer(32);
2333 LLT i16 = LLT::integer(16);
2334 LLT i8 = LLT::integer(8);
2335 unsigned Size = Ty.getSizeInBits();
2336
2337 assert(Ty == MRI.getType(Dst) &&
2338 "Expected src and dst to have the same type!");
2339
2340 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2341
2342 auto Split = MIRBuilder.buildUnmerge(i64, Val);
2343 auto CTPOP1 = MIRBuilder.buildCTPOP(i64, Split->getOperand(0));
2344 auto CTPOP2 = MIRBuilder.buildCTPOP(i64, Split->getOperand(1));
2345 auto Add = MIRBuilder.buildAdd(i64, CTPOP1, CTPOP2);
2346
2347 MIRBuilder.buildZExt(Dst, Add);
2348 MI.eraseFromParent();
2349 return true;
2350 }
2351
2352 if (!ST->hasNEON() ||
2353 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2354 // Use generic lowering when custom lowering is not possible.
2355 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2356 Helper.lowerBitCount(MI) ==
2358 }
2359
2360 // Pre-conditioning: widen Val up to the nearest vector type.
2361 // s32,s64,v4s16,v2s32 -> v8i8
2362 // v8s16,v4s32,v2s64 -> v16i8
2363 LLT VTy = Size == 128 ? LLT::fixed_vector(16, i8) : LLT::fixed_vector(8, i8);
2364 if (Ty.isScalar()) {
2365 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2366 if (Size == 32) {
2367 Val = MIRBuilder.buildZExt(i64, Val).getReg(0);
2368 }
2369 }
2370 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2371
2372 // Count bits in each byte-sized lane.
2373 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2374
2375 // Sum across lanes.
2376 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2377 Ty.getScalarSizeInBits() != 16) {
2378 LLT Dt = Ty == LLT::fixed_vector(2, i64) ? LLT::fixed_vector(4, i32) : Ty;
2379 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2380 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2381 MachineInstrBuilder Sum;
2382
2383 if (Ty == LLT::fixed_vector(2, i64)) {
2384 auto UDOT =
2385 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2386 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2387 } else if (Ty == LLT::fixed_vector(4, i32)) {
2388 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2389 } else if (Ty == LLT::fixed_vector(2, i32)) {
2390 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2391 } else {
2392 llvm_unreachable("unexpected vector shape");
2393 }
2394
2395 Sum->getOperand(0).setReg(Dst);
2396 MI.eraseFromParent();
2397 return true;
2398 }
2399
2400 Register HSum = CTPOP.getReg(0);
2401 unsigned Opc;
2402 SmallVector<LLT> HAddTys;
2403 if (Ty.isScalar()) {
2404 Opc = Intrinsic::aarch64_neon_uaddlv;
2405 HAddTys.push_back(i32);
2406 } else if (Ty == LLT::fixed_vector(8, i16)) {
2407 Opc = Intrinsic::aarch64_neon_uaddlp;
2408 HAddTys.push_back(LLT::fixed_vector(8, i16));
2409 } else if (Ty == LLT::fixed_vector(4, i32)) {
2410 Opc = Intrinsic::aarch64_neon_uaddlp;
2411 HAddTys.push_back(LLT::fixed_vector(8, i16));
2412 HAddTys.push_back(LLT::fixed_vector(4, i32));
2413 } else if (Ty == LLT::fixed_vector(2, i64)) {
2414 Opc = Intrinsic::aarch64_neon_uaddlp;
2415 HAddTys.push_back(LLT::fixed_vector(8, i16));
2416 HAddTys.push_back(LLT::fixed_vector(4, i32));
2417 HAddTys.push_back(LLT::fixed_vector(2, i64));
2418 } else if (Ty == LLT::fixed_vector(4, i16)) {
2419 Opc = Intrinsic::aarch64_neon_uaddlp;
2420 HAddTys.push_back(LLT::fixed_vector(4, i16));
2421 } else if (Ty == LLT::fixed_vector(2, i32)) {
2422 Opc = Intrinsic::aarch64_neon_uaddlp;
2423 HAddTys.push_back(LLT::fixed_vector(4, i16));
2424 HAddTys.push_back(LLT::fixed_vector(2, i32));
2425 } else
2426 llvm_unreachable("unexpected vector shape");
2428 for (LLT HTy : HAddTys) {
2429 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2430 HSum = UADD.getReg(0);
2431 }
2432
2433 // Post-conditioning.
2434 if (Ty.isScalar() && (Size == 64 || Size == 128))
2435 MIRBuilder.buildZExt(Dst, UADD);
2436 else
2437 UADD->getOperand(0).setReg(Dst);
2438 MI.eraseFromParent();
2439 return true;
2440}
2441
2442bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2443 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2444 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2445 LLT i64 = LLT::integer(64);
2446 auto Addr = MI.getOperand(1).getReg();
2447 auto DesiredI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(2));
2448 auto NewI = MIRBuilder.buildUnmerge({i64, i64}, MI.getOperand(3));
2449 auto DstLo = MRI.createGenericVirtualRegister(i64);
2450 auto DstHi = MRI.createGenericVirtualRegister(i64);
2451
2452 MachineInstrBuilder CAS;
2453 if (ST->hasLSE()) {
2454 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2455 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2456 // the rest of the MIR so we must reassemble the extracted registers into a
2457 // 128-bit known-regclass one with code like this:
2458 //
2459 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2460 // %out = CASP %in1, ...
2461 // %OldLo = G_EXTRACT %out, 0
2462 // %OldHi = G_EXTRACT %out, 64
2463 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2464 unsigned Opcode;
2465 switch (Ordering) {
2467 Opcode = AArch64::CASPAX;
2468 break;
2470 Opcode = AArch64::CASPLX;
2471 break;
2474 Opcode = AArch64::CASPALX;
2475 break;
2476 default:
2477 Opcode = AArch64::CASPX;
2478 break;
2479 }
2480
2481 LLT s128 = LLT::scalar(128);
2482 auto CASDst = MRI.createGenericVirtualRegister(s128);
2483 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2484 auto CASNew = MRI.createGenericVirtualRegister(s128);
2485 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2486 .addUse(DesiredI->getOperand(0).getReg())
2487 .addImm(AArch64::sube64)
2488 .addUse(DesiredI->getOperand(1).getReg())
2489 .addImm(AArch64::subo64);
2490 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2491 .addUse(NewI->getOperand(0).getReg())
2492 .addImm(AArch64::sube64)
2493 .addUse(NewI->getOperand(1).getReg())
2494 .addImm(AArch64::subo64);
2495
2496 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2497
2498 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2499 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2500 } else {
2501 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2502 // can take arbitrary registers so it just has the normal GPR64 operands the
2503 // rest of AArch64 is expecting.
2504 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2505 unsigned Opcode;
2506 switch (Ordering) {
2508 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2509 break;
2511 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2512 break;
2515 Opcode = AArch64::CMP_SWAP_128;
2516 break;
2517 default:
2518 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2519 break;
2520 }
2521
2522 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2523 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2524 {Addr, DesiredI->getOperand(0),
2525 DesiredI->getOperand(1), NewI->getOperand(0),
2526 NewI->getOperand(1)});
2527 }
2528
2529 CAS.cloneMemRefs(MI);
2530 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2531 *MRI.getTargetRegisterInfo(),
2532 *ST->getRegBankInfo());
2533
2534 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2535 MI.eraseFromParent();
2536 return true;
2537}
2538
2539bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2540 LegalizerHelper &Helper) const {
2541 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2542 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2543 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2544 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2545 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2546 MI.eraseFromParent();
2547 return true;
2548}
2549
2550bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2551 LegalizerHelper &Helper) const {
2552 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2553
2554 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2555 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2556 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2557 // the instruction).
2558 auto &Value = MI.getOperand(1);
2559 Register ExtValueReg =
2560 MIRBuilder.buildAnyExt(LLT::integer(64), Value).getReg(0);
2561 Value.setReg(ExtValueReg);
2562 return true;
2563 }
2564
2565 return false;
2566}
2567
2568bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2569 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2570 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2571 auto VRegAndVal =
2573 if (VRegAndVal)
2574 return true;
2575 LLT VecTy = MRI.getType(Element->getVectorReg());
2576 if (VecTy.isScalableVector())
2577 return true;
2578 return Helper.lowerExtractInsertVectorElt(MI) !=
2580}
2581
2582bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2583 MachineInstr &MI, LegalizerHelper &Helper) const {
2584 MachineFunction &MF = *MI.getParent()->getParent();
2585 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2586 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2587
2588 // If stack probing is not enabled for this function, use the default
2589 // lowering.
2590 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2591 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2592 "inline-asm") {
2593 Helper.lowerDynStackAlloc(MI);
2594 return true;
2595 }
2596
2597 Register Dst = MI.getOperand(0).getReg();
2598 Register AllocSize = MI.getOperand(1).getReg();
2599 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2600
2601 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2602 "Unexpected type for dynamic alloca");
2603 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2604 "Unexpected type for dynamic alloca");
2605
2606 LLT PtrTy = MRI.getType(Dst);
2607 Register SPReg =
2609 Register SPTmp =
2610 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2611 auto NewMI =
2612 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2613 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2614 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2615 MIRBuilder.buildCopy(Dst, SPTmp);
2616
2617 MI.eraseFromParent();
2618 return true;
2619}
2620
2621bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2622 LegalizerHelper &Helper) const {
2623 MachineIRBuilder &MIB = Helper.MIRBuilder;
2624 auto &AddrVal = MI.getOperand(0);
2625
2626 int64_t IsWrite = MI.getOperand(1).getImm();
2627 int64_t Locality = MI.getOperand(2).getImm();
2628 int64_t IsData = MI.getOperand(3).getImm();
2629
2630 bool IsStream = Locality == 0;
2631 if (Locality != 0) {
2632 assert(Locality <= 3 && "Prefetch locality out-of-range");
2633 // The locality degree is the opposite of the cache speed.
2634 // Put the number the other way around.
2635 // The encoding starts at 0 for level 1
2636 Locality = 3 - Locality;
2637 }
2638
2639 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2640
2641 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2642 MI.eraseFromParent();
2643 return true;
2644}
2645
2646bool AArch64LegalizerInfo::legalizeConcatVectors(
2648 MachineIRBuilder &MIRBuilder) const {
2649 // Widen sub-byte element vectors to byte-sized elements before concatenating.
2650 // This is analogous to SDAG's integer type promotion for sub-byte types.
2652 Register DstReg = Concat.getReg(0);
2653 LLT DstTy = MRI.getType(DstReg);
2654 assert(DstTy.getScalarSizeInBits() < 8 && "Expected dst ty to be < 8b");
2655
2656 unsigned WideEltSize =
2657 std::max(8u, (unsigned)PowerOf2Ceil(DstTy.getScalarSizeInBits()));
2658 LLT SrcTy = MRI.getType(Concat.getSourceReg(0));
2659 LLT WideSrcTy = SrcTy.changeElementSize(WideEltSize);
2660 LLT WideDstTy = DstTy.changeElementSize(WideEltSize);
2661
2662 SmallVector<Register> WideSrcs;
2663 for (unsigned I = 0; I < Concat.getNumSources(); ++I) {
2664 auto Wide = MIRBuilder.buildAnyExt(WideSrcTy, Concat.getSourceReg(I));
2665 WideSrcs.push_back(Wide.getReg(0));
2666 }
2667
2668 auto WideConcat = MIRBuilder.buildConcatVectors(WideDstTy, WideSrcs);
2669 MIRBuilder.buildTrunc(DstReg, WideConcat);
2670 MI.eraseFromParent();
2671 return true;
2672}
2673
2674bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2675 MachineIRBuilder &MIRBuilder,
2676 MachineRegisterInfo &MRI) const {
2677 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2678
2679 // This function legalizes f64 -> bf16 and f64 -> f16 truncations via f64 ->
2680 // f32 G_FPTRUNC_ODD and f32 -> [b]f16 G_FPTRUNC, which apparently avoids the
2681 // usual double-rounding issue that could be present from using twin
2682 // G_FPTRUNC.
2683
2684 if (DstTy.isBFloat16() && SrcTy.isFloat64()) {
2685 auto Mid =
2686 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {LLT::float32()}, {Src});
2687 MIRBuilder.buildInstr(AArch64::G_FPTRUNC, {Dst}, {Mid});
2688 MI.eraseFromParent();
2689 return true;
2690 }
2691
2692 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2693 "Expected a power of 2 elements");
2694
2695 // We must mutate types here as FPTrunc may be used on a IEEE floating point
2696 // or a brainfloat.
2697 LLT v2s16 = DstTy.changeElementCount(2);
2698 LLT v4s16 = DstTy.changeElementCount(4);
2699 LLT v2s32 = SrcTy.changeElementCount(2).changeElementSize(32);
2700 LLT v4s32 = SrcTy.changeElementCount(4).changeElementSize(32);
2701 LLT v2s64 = SrcTy.changeElementCount(2);
2702
2703 SmallVector<Register> RegsToUnmergeTo;
2704 SmallVector<Register> TruncOddDstRegs;
2705 SmallVector<Register> RegsToMerge;
2706
2707 unsigned ElemCount = SrcTy.getNumElements();
2708
2709 // Find the biggest size chunks we can work with
2710 int StepSize = ElemCount % 4 ? 2 : 4;
2711
2712 // If we have a power of 2 greater than 2, we need to first unmerge into
2713 // enough pieces
2714 if (ElemCount <= 2)
2715 RegsToUnmergeTo.push_back(Src);
2716 else {
2717 for (unsigned i = 0; i < ElemCount / 2; ++i)
2718 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2719
2720 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2721 }
2722
2723 // Create all of the round-to-odd instructions and store them
2724 for (auto SrcReg : RegsToUnmergeTo) {
2725 Register Mid =
2726 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2727 .getReg(0);
2728 TruncOddDstRegs.push_back(Mid);
2729 }
2730
2731 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2732 // truncate 2s32 to 2s16.
2733 unsigned Index = 0;
2734 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2735 if (StepSize == 4) {
2736 Register ConcatDst =
2737 MIRBuilder
2739 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2740 .getReg(0);
2741
2742 RegsToMerge.push_back(
2743 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2744 } else {
2745 RegsToMerge.push_back(
2746 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2747 }
2748 }
2749
2750 // If there is only one register, replace the destination
2751 if (RegsToMerge.size() == 1) {
2752 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2753 MI.eraseFromParent();
2754 return true;
2755 }
2756
2757 // Merge the rest of the instructions & replace the register
2758 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2759 MRI.replaceRegWith(Dst, Fin);
2760 MI.eraseFromParent();
2761 return true;
2762}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:77
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
static constexpr LLT float128()
Get a 128-bit IEEE quad value.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
LLT divide(int Factor) const
Return a type that is Factor times smaller.
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
bool isFloat64() const
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:156
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1527
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...