LLVM 23.0.0git
LegalizeVectorTypes.cpp
Go to the documentation of this file.
1//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file performs vector type splitting and scalarization for LegalizeTypes.
10// Scalarization is the act of changing a computation in an illegal one-element
11// vector type to be a computation in its scalar element type. For example,
12// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
13// as a base case when scalarizing vector arithmetic like <4 x f32>, which
14// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
15// types.
16// Splitting is the act of changing a computation in an invalid vector type to
17// be a computation in two vectors of half the size. For example, implementing
18// <128 x f32> operations in terms of two <64 x f32> operations.
19//
20//===----------------------------------------------------------------------===//
21
22#include "LegalizeTypes.h"
27#include "llvm/IR/DataLayout.h"
31#include <numeric>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "legalize-types"
36
37//===----------------------------------------------------------------------===//
38// Result Vector Scalarization: <1 x ty> -> ty.
39//===----------------------------------------------------------------------===//
40
41void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
42 LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
43 N->dump(&DAG));
44 SDValue R = SDValue();
45
46 switch (N->getOpcode()) {
47 default:
48#ifndef NDEBUG
49 dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
50 N->dump(&DAG);
51 dbgs() << "\n";
52#endif
53 report_fatal_error("Do not know how to scalarize the result of this "
54 "operator!\n");
55
58 R = ScalarizeVecRes_LOOP_DEPENDENCE_MASK(N);
59 break;
60 case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
61 case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
62 case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
63 case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
64 case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
66 R = ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(N);
67 break;
68 case ISD::AssertZext:
69 case ISD::AssertSext:
70 case ISD::FPOWI:
72 R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
73 break;
74 case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
76 R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
77 break;
78 case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
79 case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
80 case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
81 case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
82 case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
83 case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
84 case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
85 case ISD::POISON:
86 case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
87 case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
88 case ISD::IS_FPCLASS: R = ScalarizeVecRes_IS_FPCLASS(N); break;
92 R = ScalarizeVecRes_VecInregOp(N);
93 break;
94 case ISD::ABS:
96 case ISD::ANY_EXTEND:
97 case ISD::BITREVERSE:
98 case ISD::BSWAP:
99 case ISD::CTLZ:
101 case ISD::CTPOP:
102 case ISD::CTTZ:
104 case ISD::FABS:
105 case ISD::FACOS:
106 case ISD::FASIN:
107 case ISD::FATAN:
108 case ISD::FCEIL:
109 case ISD::FCOS:
110 case ISD::FCOSH:
111 case ISD::FEXP:
112 case ISD::FEXP2:
113 case ISD::FEXP10:
114 case ISD::FFLOOR:
115 case ISD::FLOG:
116 case ISD::FLOG10:
117 case ISD::FLOG2:
118 case ISD::FNEARBYINT:
119 case ISD::FNEG:
120 case ISD::FREEZE:
121 case ISD::ARITH_FENCE:
122 case ISD::FP_EXTEND:
123 case ISD::FP_TO_SINT:
124 case ISD::FP_TO_UINT:
125 case ISD::FRINT:
126 case ISD::LRINT:
127 case ISD::LLRINT:
128 case ISD::FROUND:
129 case ISD::FROUNDEVEN:
130 case ISD::LROUND:
131 case ISD::LLROUND:
132 case ISD::FSIN:
133 case ISD::FSINH:
134 case ISD::FSQRT:
135 case ISD::FTAN:
136 case ISD::FTANH:
137 case ISD::FTRUNC:
138 case ISD::SIGN_EXTEND:
139 case ISD::SINT_TO_FP:
140 case ISD::TRUNCATE:
141 case ISD::UINT_TO_FP:
142 case ISD::ZERO_EXTEND:
144 R = ScalarizeVecRes_UnaryOp(N);
145 break;
147 R = ScalarizeVecRes_ADDRSPACECAST(N);
148 break;
149 case ISD::FMODF:
150 case ISD::FFREXP:
151 case ISD::FSINCOS:
152 case ISD::FSINCOSPI:
153 R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo);
154 break;
155 case ISD::ADD:
156 case ISD::AND:
157 case ISD::AVGCEILS:
158 case ISD::AVGCEILU:
159 case ISD::AVGFLOORS:
160 case ISD::AVGFLOORU:
161 case ISD::FADD:
162 case ISD::FCOPYSIGN:
163 case ISD::FDIV:
164 case ISD::FMUL:
165 case ISD::FMINNUM:
166 case ISD::FMAXNUM:
169 case ISD::FMINIMUM:
170 case ISD::FMAXIMUM:
171 case ISD::FMINIMUMNUM:
172 case ISD::FMAXIMUMNUM:
173 case ISD::FLDEXP:
174 case ISD::ABDS:
175 case ISD::ABDU:
176 case ISD::SMIN:
177 case ISD::SMAX:
178 case ISD::UMIN:
179 case ISD::UMAX:
180
181 case ISD::SADDSAT:
182 case ISD::UADDSAT:
183 case ISD::SSUBSAT:
184 case ISD::USUBSAT:
185 case ISD::SSHLSAT:
186 case ISD::USHLSAT:
187
188 case ISD::FPOW:
189 case ISD::FATAN2:
190 case ISD::FREM:
191 case ISD::FSUB:
192 case ISD::MUL:
193 case ISD::MULHS:
194 case ISD::MULHU:
195 case ISD::OR:
196 case ISD::SDIV:
197 case ISD::SREM:
198 case ISD::SUB:
199 case ISD::UDIV:
200 case ISD::UREM:
201 case ISD::XOR:
202 case ISD::SHL:
203 case ISD::SRA:
204 case ISD::SRL:
205 case ISD::ROTL:
206 case ISD::ROTR:
207 case ISD::CLMUL:
208 case ISD::CLMULR:
209 case ISD::CLMULH:
210 R = ScalarizeVecRes_BinOp(N);
211 break;
212
213 case ISD::MASKED_UDIV:
214 case ISD::MASKED_SDIV:
215 case ISD::MASKED_UREM:
216 case ISD::MASKED_SREM:
217 R = ScalarizeVecRes_MaskedBinOp(N);
218 break;
219
220 case ISD::SCMP:
221 case ISD::UCMP:
222 R = ScalarizeVecRes_CMP(N);
223 break;
224
225 case ISD::FMA:
226 case ISD::FSHL:
227 case ISD::FSHR:
228 R = ScalarizeVecRes_TernaryOp(N);
229 break;
230
231#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
232 case ISD::STRICT_##DAGN:
233#include "llvm/IR/ConstrainedOps.def"
234 R = ScalarizeVecRes_StrictFPOp(N);
235 break;
236
239 R = ScalarizeVecRes_FP_TO_XINT_SAT(N);
240 break;
241
242 case ISD::UADDO:
243 case ISD::SADDO:
244 case ISD::USUBO:
245 case ISD::SSUBO:
246 case ISD::UMULO:
247 case ISD::SMULO:
248 R = ScalarizeVecRes_OverflowOp(N, ResNo);
249 break;
250 case ISD::SMULFIX:
251 case ISD::SMULFIXSAT:
252 case ISD::UMULFIX:
253 case ISD::UMULFIXSAT:
254 case ISD::SDIVFIX:
255 case ISD::SDIVFIXSAT:
256 case ISD::UDIVFIX:
257 case ISD::UDIVFIXSAT:
258 R = ScalarizeVecRes_FIX(N);
259 break;
260 }
261
262 // If R is null, the sub-method took care of registering the result.
263 if (R.getNode())
264 SetScalarizedVector(SDValue(N, ResNo), R);
265}
266
267SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
268 SDValue LHS = GetScalarizedVector(N->getOperand(0));
269 SDValue RHS = GetScalarizedVector(N->getOperand(1));
270 return DAG.getNode(N->getOpcode(), SDLoc(N),
271 LHS.getValueType(), LHS, RHS, N->getFlags());
272}
273
274SDValue DAGTypeLegalizer::ScalarizeVecRes_MaskedBinOp(SDNode *N) {
275 SDLoc DL(N);
276 SDValue LHS = GetScalarizedVector(N->getOperand(0));
277 SDValue RHS = GetScalarizedVector(N->getOperand(1));
278 SDValue Mask = N->getOperand(2);
279 EVT MaskVT = Mask.getValueType();
280 // The vselect result and input vectors need scalarizing, but it's
281 // not a given that the mask does. For instance, in AVX512 v1i1 is legal.
282 // See the similar logic in ScalarizeVecRes_SETCC.
283 if (getTypeAction(MaskVT) == TargetLowering::TypeScalarizeVector)
284 Mask = GetScalarizedVector(Mask);
285 else
286 Mask = DAG.getExtractVectorElt(DL, MaskVT.getVectorElementType(), Mask, 0);
287 // Vectors may have a different boolean contents to scalars, so truncate to i1
288 // and let type legalization promote appropriately.
289 Mask = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Mask);
290 // Masked binary ops don't have UB on disabled lanes but produce poison, so
291 // use 1 as the divisor to avoid division by zero and overflow.
292 SDValue Divisor = DAG.getSelect(DL, LHS.getValueType(), Mask, RHS,
293 DAG.getConstant(1, DL, LHS.getValueType()));
294 return DAG.getNode(ISD::getUnmaskedBinOpOpcode(N->getOpcode()), DL,
295 LHS.getValueType(), LHS, Divisor);
296}
297
298SDValue DAGTypeLegalizer::ScalarizeVecRes_CMP(SDNode *N) {
299 SDLoc DL(N);
300
301 SDValue LHS = N->getOperand(0);
302 SDValue RHS = N->getOperand(1);
303 if (getTypeAction(LHS.getValueType()) ==
305 LHS = GetScalarizedVector(LHS);
306 RHS = GetScalarizedVector(RHS);
307 } else {
308 EVT VT = LHS.getValueType().getVectorElementType();
309 LHS = DAG.getExtractVectorElt(DL, VT, LHS, 0);
310 RHS = DAG.getExtractVectorElt(DL, VT, RHS, 0);
311 }
312
313 return DAG.getNode(N->getOpcode(), SDLoc(N),
314 N->getValueType(0).getVectorElementType(), LHS, RHS);
315}
316
317SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
318 SDValue Op0 = GetScalarizedVector(N->getOperand(0));
319 SDValue Op1 = GetScalarizedVector(N->getOperand(1));
320 SDValue Op2 = GetScalarizedVector(N->getOperand(2));
321 return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
322 Op2, N->getFlags());
323}
324
325SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
326 SDValue Op0 = GetScalarizedVector(N->getOperand(0));
327 SDValue Op1 = GetScalarizedVector(N->getOperand(1));
328 SDValue Op2 = N->getOperand(2);
329 return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
330 Op2, N->getFlags());
331}
332
334DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N,
335 unsigned ResNo) {
336 assert(N->getValueType(0).getVectorNumElements() == 1 &&
337 "Unexpected vector type!");
338 SDValue Elt = GetScalarizedVector(N->getOperand(0));
339
340 EVT VT0 = N->getValueType(0);
341 EVT VT1 = N->getValueType(1);
342 SDLoc dl(N);
343
344 SDNode *ScalarNode =
345 DAG.getNode(N->getOpcode(), dl,
346 {VT0.getScalarType(), VT1.getScalarType()}, Elt)
347 .getNode();
348
349 // Replace the other vector result not being explicitly scalarized here.
350 unsigned OtherNo = 1 - ResNo;
351 EVT OtherVT = N->getValueType(OtherNo);
352 if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
353 SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
354 } else {
355 SDValue OtherVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OtherVT,
356 SDValue(ScalarNode, OtherNo));
357 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
358 }
359
360 return SDValue(ScalarNode, ResNo);
361}
362
363SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
364 EVT VT = N->getValueType(0).getVectorElementType();
365 unsigned NumOpers = N->getNumOperands();
366 SDValue Chain = N->getOperand(0);
367 EVT ValueVTs[] = {VT, MVT::Other};
368 SDLoc dl(N);
369
370 SmallVector<SDValue, 4> Opers(NumOpers);
371
372 // The Chain is the first operand.
373 Opers[0] = Chain;
374
375 // Now process the remaining operands.
376 for (unsigned i = 1; i < NumOpers; ++i) {
377 SDValue Oper = N->getOperand(i);
378 EVT OperVT = Oper.getValueType();
379
380 if (OperVT.isVector()) {
381 if (getTypeAction(OperVT) == TargetLowering::TypeScalarizeVector)
382 Oper = GetScalarizedVector(Oper);
383 else
384 Oper =
385 DAG.getExtractVectorElt(dl, OperVT.getVectorElementType(), Oper, 0);
386 }
387
388 Opers[i] = Oper;
389 }
390
391 SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs),
392 Opers, N->getFlags());
393
394 // Legalize the chain result - switch anything that used the old chain to
395 // use the new one.
396 ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
397 return Result;
398}
399
400SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
401 unsigned ResNo) {
402 SDLoc DL(N);
403 EVT ResVT = N->getValueType(0);
404 EVT OvVT = N->getValueType(1);
405
406 SDValue ScalarLHS, ScalarRHS;
407 if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) {
408 ScalarLHS = GetScalarizedVector(N->getOperand(0));
409 ScalarRHS = GetScalarizedVector(N->getOperand(1));
410 } else {
411 SmallVector<SDValue, 1> ElemsLHS, ElemsRHS;
412 DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS);
413 DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS);
414 ScalarLHS = ElemsLHS[0];
415 ScalarRHS = ElemsRHS[0];
416 }
417
418 SDVTList ScalarVTs = DAG.getVTList(
420 SDNode *ScalarNode = DAG.getNode(N->getOpcode(), DL, ScalarVTs,
421 {ScalarLHS, ScalarRHS}, N->getFlags())
422 .getNode();
423
424 // Replace the other vector result not being explicitly scalarized here.
425 unsigned OtherNo = 1 - ResNo;
426 EVT OtherVT = N->getValueType(OtherNo);
427 if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
428 SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
429 } else {
430 SDValue OtherVal = DAG.getNode(
431 ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo));
432 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
433 }
434
435 return SDValue(ScalarNode, ResNo);
436}
437
438SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
439 unsigned ResNo) {
440 SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
441 return GetScalarizedVector(Op);
442}
443
444SDValue DAGTypeLegalizer::ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
445 SDLoc DL(N);
446 SDValue SourceValue = N->getOperand(0);
447 SDValue SinkValue = N->getOperand(1);
448 SDValue EltSizeInBytes = N->getOperand(2);
449 SDValue LaneOffset = N->getOperand(3);
450
451 EVT PtrVT = SourceValue->getValueType(0);
452 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
453
454 // Take the difference between the pointers and divided by the element size,
455 // to see how many lanes separate them.
456 SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
457 if (IsReadAfterWrite)
458 Diff = DAG.getNode(ISD::ABS, DL, PtrVT, Diff);
459 Diff = DAG.getNode(ISD::SDIV, DL, PtrVT, Diff, EltSizeInBytes);
460
461 // The pointers do not alias if:
462 // * Diff <= 0 || LaneOffset < Diff (WAR_MASK)
463 // * Diff == 0 || LaneOffset < abs(Diff) (RAW_MASK)
464 // Note: If LaneOffset is zero, both cases will fold to "true".
465 EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
466 Diff.getValueType());
467 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
468 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
469 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
470 return DAG.getNode(ISD::OR, DL, CmpVT, Cmp,
471 DAG.getSetCC(DL, CmpVT, LaneOffset, Diff, ISD::SETULT));
472}
473
474SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
475 SDValue Op = N->getOperand(0);
476 if (getTypeAction(Op.getValueType()) == TargetLowering::TypeScalarizeVector)
477 Op = GetScalarizedVector(Op);
478 EVT NewVT = N->getValueType(0).getVectorElementType();
479 return DAG.getNode(ISD::BITCAST, SDLoc(N),
480 NewVT, Op);
481}
482
483SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
484 EVT EltVT = N->getValueType(0).getVectorElementType();
485 SDValue InOp = N->getOperand(0);
486 // The BUILD_VECTOR operands may be of wider element types and
487 // we may need to truncate them back to the requested return type.
488 if (EltVT.isInteger())
489 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
490 return InOp;
491}
492
493SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
494 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
495 N->getValueType(0).getVectorElementType(),
496 N->getOperand(0), N->getOperand(1));
497}
498
499SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
500 SDLoc DL(N);
501 SDValue Op = N->getOperand(0);
502 EVT OpVT = Op.getValueType();
503 // The result needs scalarizing, but it's not a given that the source does.
504 // See similar logic in ScalarizeVecRes_UnaryOp.
505 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
506 Op = GetScalarizedVector(Op);
507 } else {
508 EVT VT = OpVT.getVectorElementType();
509 Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
510 }
511 return DAG.getNode(ISD::FP_ROUND, DL,
512 N->getValueType(0).getVectorElementType(), Op,
513 N->getOperand(1));
514}
515
516SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N) {
517 SDLoc DL(N);
518 SDValue Op = N->getOperand(0);
519 EVT OpVT = Op.getValueType();
520 // The result needs scalarizing, but it's not a given that the source does.
521 // See similar logic in ScalarizeVecRes_UnaryOp.
522 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
523 Op = GetScalarizedVector(Op);
524 } else {
525 EVT VT = OpVT.getVectorElementType();
526 Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
527 }
528 return DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, DL,
529 N->getValueType(0).getVectorElementType(), Op,
530 N->getOperand(1));
531}
532
533SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N) {
534 SDValue Op = GetScalarizedVector(N->getOperand(0));
535 return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
536 N->getOperand(1));
537}
538
539SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
540 // The value to insert may have a wider type than the vector element type,
541 // so be sure to truncate it to the element type if necessary.
542 SDValue Op = N->getOperand(1);
543 EVT EltVT = N->getValueType(0).getVectorElementType();
544 if (Op.getValueType() != EltVT)
545 // FIXME: Can this happen for floating point types?
546 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op);
547 return Op;
548}
549
550SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
551 SDValue Result = DAG.getAtomicLoad(
552 N->getExtensionType(), SDLoc(N), N->getMemoryVT().getVectorElementType(),
553 N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
554 N->getMemOperand());
555
556 // Legalize the chain result - switch anything that used the old chain to
557 // use the new one.
558 ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
559 return Result;
560}
561
562SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
563 assert(N->isUnindexed() && "Indexed vector load?");
564
565 SDValue Result = DAG.getLoad(
566 ISD::UNINDEXED, N->getExtensionType(),
567 N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
568 N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
569 N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
570 N->getBaseAlign(), N->getMemOperand()->getFlags(), N->getAAInfo());
571
572 // Legalize the chain result - switch anything that used the old chain to
573 // use the new one.
574 ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
575 return Result;
576}
577
578SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
579 // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
580 EVT DestVT = N->getValueType(0).getVectorElementType();
581 SDValue Op = N->getOperand(0);
582 EVT OpVT = Op.getValueType();
583 SDLoc DL(N);
584 // The result needs scalarizing, but it's not a given that the source does.
585 // This is a workaround for targets where it's impossible to scalarize the
586 // result of a conversion, because the source type is legal.
587 // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
588 // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
589 // legal and was not scalarized.
590 // See the similar logic in ScalarizeVecRes_SETCC
591 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
592 Op = GetScalarizedVector(Op);
593 } else {
594 EVT VT = OpVT.getVectorElementType();
595 Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
596 }
597 return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags());
598}
599
600SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
601 EVT EltVT = N->getValueType(0).getVectorElementType();
602 EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
603 SDValue LHS = GetScalarizedVector(N->getOperand(0));
604 return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT,
605 LHS, DAG.getValueType(ExtVT));
606}
607
608SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
609 SDLoc DL(N);
610 SDValue Op = N->getOperand(0);
611
612 EVT OpVT = Op.getValueType();
613 EVT OpEltVT = OpVT.getVectorElementType();
614 EVT EltVT = N->getValueType(0).getVectorElementType();
615
616 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
617 Op = GetScalarizedVector(Op);
618 } else {
619 Op = DAG.getExtractVectorElt(DL, OpEltVT, Op, 0);
620 }
621
622 switch (N->getOpcode()) {
624 return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op);
626 return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op);
628 return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op);
629 }
630
631 llvm_unreachable("Illegal extend_vector_inreg opcode");
632}
633
634SDValue DAGTypeLegalizer::ScalarizeVecRes_ADDRSPACECAST(SDNode *N) {
635 EVT DestVT = N->getValueType(0).getVectorElementType();
636 SDValue Op = N->getOperand(0);
637 EVT OpVT = Op.getValueType();
638 SDLoc DL(N);
639 // The result needs scalarizing, but it's not a given that the source does.
640 // This is a workaround for targets where it's impossible to scalarize the
641 // result of a conversion, because the source type is legal.
642 // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
643 // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
644 // legal and was not scalarized.
645 // See the similar logic in ScalarizeVecRes_SETCC
646 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
647 Op = GetScalarizedVector(Op);
648 } else {
649 EVT VT = OpVT.getVectorElementType();
650 Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
651 }
652 auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N);
653 unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace();
654 unsigned DestAS = AddrSpaceCastN->getDestAddressSpace();
655 return DAG.getAddrSpaceCast(DL, DestVT, Op, SrcAS, DestAS);
656}
657
658SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
659 // If the operand is wider than the vector element type then it is implicitly
660 // truncated. Make that explicit here.
661 EVT EltVT = N->getValueType(0).getVectorElementType();
662 SDValue InOp = N->getOperand(0);
663 if (InOp.getValueType() != EltVT)
664 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
665 return InOp;
666}
667
668SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
669 SDValue Cond = N->getOperand(0);
670 EVT OpVT = Cond.getValueType();
671 SDLoc DL(N);
672 // The vselect result and true/value operands needs scalarizing, but it's
673 // not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
674 // See the similar logic in ScalarizeVecRes_SETCC
675 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
676 Cond = GetScalarizedVector(Cond);
677 } else {
678 EVT VT = OpVT.getVectorElementType();
679 Cond = DAG.getExtractVectorElt(DL, VT, Cond, 0);
680 }
681
682 SDValue LHS = GetScalarizedVector(N->getOperand(1));
684 TLI.getBooleanContents(false, false);
685 TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
686
687 // If integer and float booleans have different contents then we can't
688 // reliably optimize in all cases. There is a full explanation for this in
689 // DAGCombiner::visitSELECT() where the same issue affects folding
690 // (select C, 0, 1) to (xor C, 1).
691 if (TLI.getBooleanContents(false, false) !=
692 TLI.getBooleanContents(false, true)) {
693 // At least try the common case where the boolean is generated by a
694 // comparison.
695 if (Cond->getOpcode() == ISD::SETCC) {
696 EVT OpVT = Cond->getOperand(0).getValueType();
697 ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
698 VecBool = TLI.getBooleanContents(OpVT);
699 } else
701 }
702
703 EVT CondVT = Cond.getValueType();
704 if (ScalarBool != VecBool) {
705 switch (ScalarBool) {
707 break;
711 // Vector read from all ones, scalar expects a single 1 so mask.
712 Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
713 Cond, DAG.getConstant(1, SDLoc(N), CondVT));
714 break;
718 // Vector reads from a one, scalar from all ones so sign extend.
719 Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT,
720 Cond, DAG.getValueType(MVT::i1));
721 break;
722 }
723 }
724
725 // Truncate the condition if needed
726 auto BoolVT = getSetCCResultType(CondVT);
727 if (BoolVT.bitsLT(CondVT))
728 Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond);
729
730 return DAG.getSelect(SDLoc(N),
731 LHS.getValueType(), Cond, LHS,
732 GetScalarizedVector(N->getOperand(2)));
733}
734
735SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
736 SDValue LHS = GetScalarizedVector(N->getOperand(1));
737 return DAG.getSelect(SDLoc(N),
738 LHS.getValueType(), N->getOperand(0), LHS,
739 GetScalarizedVector(N->getOperand(2)));
740}
741
742SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
743 SDValue LHS = GetScalarizedVector(N->getOperand(2));
744 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
745 N->getOperand(0), N->getOperand(1),
746 LHS, GetScalarizedVector(N->getOperand(3)),
747 N->getOperand(4));
748}
749
750SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
751 return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
752}
753
754SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
755 // Figure out if the scalar is the LHS or RHS and return it.
756 SDValue Arg = N->getOperand(2).getOperand(0);
757 if (Arg.isUndef())
758 return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
759 unsigned Op = !cast<ConstantSDNode>(Arg)->isZero();
760 return GetScalarizedVector(N->getOperand(Op));
761}
762
763SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N) {
764 SDValue Src = N->getOperand(0);
765 EVT SrcVT = Src.getValueType();
766 SDLoc dl(N);
767
768 // Handle case where result is scalarized but operand is not
769 if (getTypeAction(SrcVT) == TargetLowering::TypeScalarizeVector)
770 Src = GetScalarizedVector(Src);
771 else
772 Src = DAG.getNode(
774 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
775
776 EVT DstVT = N->getValueType(0).getVectorElementType();
777 return DAG.getNode(N->getOpcode(), dl, DstVT, Src, N->getOperand(1));
778}
779
780SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
781 assert(N->getValueType(0).isVector() &&
782 N->getOperand(0).getValueType().isVector() &&
783 "Operand types must be vectors");
784 SDValue LHS = N->getOperand(0);
785 SDValue RHS = N->getOperand(1);
786 EVT OpVT = LHS.getValueType();
787 EVT NVT = N->getValueType(0).getVectorElementType();
788 SDLoc DL(N);
789
790 // The result needs scalarizing, but it's not a given that the source does.
791 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
792 LHS = GetScalarizedVector(LHS);
793 RHS = GetScalarizedVector(RHS);
794 } else {
795 EVT VT = OpVT.getVectorElementType();
796 LHS = DAG.getExtractVectorElt(DL, VT, LHS, 0);
797 RHS = DAG.getExtractVectorElt(DL, VT, RHS, 0);
798 }
799
800 // Turn it into a scalar SETCC.
801 SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
802 N->getOperand(2));
803 // Vectors may have a different boolean contents to scalars. Promote the
804 // value appropriately.
805 ISD::NodeType ExtendCode =
806 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
807 return DAG.getNode(ExtendCode, DL, NVT, Res);
808}
809
810SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) {
811 SDLoc DL(N);
812 SDValue Arg = N->getOperand(0);
813 SDValue Test = N->getOperand(1);
814 EVT ArgVT = Arg.getValueType();
815 EVT ResultVT = N->getValueType(0).getVectorElementType();
816
817 if (getTypeAction(ArgVT) == TargetLowering::TypeScalarizeVector) {
818 Arg = GetScalarizedVector(Arg);
819 } else {
820 EVT VT = ArgVT.getVectorElementType();
821 Arg = DAG.getExtractVectorElt(DL, VT, Arg, 0);
822 }
823
824 SDValue Res =
825 DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, {Arg, Test}, N->getFlags());
826 // Vectors may have a different boolean contents to scalars. Promote the
827 // value appropriately.
828 ISD::NodeType ExtendCode =
829 TargetLowering::getExtendForContent(TLI.getBooleanContents(ArgVT));
830 return DAG.getNode(ExtendCode, DL, ResultVT, Res);
831}
832
833//===----------------------------------------------------------------------===//
834// Operand Vector Scalarization <1 x ty> -> ty.
835//===----------------------------------------------------------------------===//
836
837bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
838 LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
839 N->dump(&DAG));
840 SDValue Res = SDValue();
841
842 switch (N->getOpcode()) {
843 default:
844#ifndef NDEBUG
845 dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
846 N->dump(&DAG);
847 dbgs() << "\n";
848#endif
849 report_fatal_error("Do not know how to scalarize this operator's "
850 "operand!\n");
851 case ISD::BITCAST:
852 Res = ScalarizeVecOp_BITCAST(N);
853 break;
854 case ISD::FAKE_USE:
855 Res = ScalarizeVecOp_FAKE_USE(N);
856 break;
857 case ISD::ANY_EXTEND:
858 case ISD::ZERO_EXTEND:
859 case ISD::SIGN_EXTEND:
860 case ISD::TRUNCATE:
861 case ISD::FP_TO_SINT:
862 case ISD::FP_TO_UINT:
863 case ISD::SINT_TO_FP:
864 case ISD::UINT_TO_FP:
865 case ISD::LROUND:
866 case ISD::LLROUND:
867 case ISD::LRINT:
868 case ISD::LLRINT:
869 Res = ScalarizeVecOp_UnaryOp(N);
870 break;
874 Res = ScalarizeVecOp_UnaryOpWithExtraInput(N);
875 break;
880 Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
881 break;
883 Res = ScalarizeVecOp_CONCAT_VECTORS(N);
884 break;
886 Res = ScalarizeVecOp_INSERT_SUBVECTOR(N, OpNo);
887 break;
889 Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
890 break;
891 case ISD::VSELECT:
892 Res = ScalarizeVecOp_VSELECT(N);
893 break;
894 case ISD::SETCC:
895 Res = ScalarizeVecOp_VSETCC(N);
896 break;
899 Res = ScalarizeVecOp_VSTRICT_FSETCC(N, OpNo);
900 break;
901 case ISD::STORE:
902 Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
903 break;
905 Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
906 break;
907 case ISD::FP_ROUND:
908 Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
909 break;
911 Res = ScalarizeVecOp_STRICT_FP_EXTEND(N);
912 break;
913 case ISD::FP_EXTEND:
914 Res = ScalarizeVecOp_FP_EXTEND(N);
915 break;
931 Res = ScalarizeVecOp_VECREDUCE(N);
932 break;
935 Res = ScalarizeVecOp_VECREDUCE_SEQ(N);
936 break;
937 case ISD::SCMP:
938 case ISD::UCMP:
939 Res = ScalarizeVecOp_CMP(N);
940 break;
942 Res = ScalarizeVecOp_VECTOR_FIND_LAST_ACTIVE(N);
943 break;
944 case ISD::CTTZ_ELTS:
946 Res = ScalarizeVecOp_CTTZ_ELTS(N);
947 break;
948 case ISD::MASKED_UDIV:
949 case ISD::MASKED_SDIV:
950 case ISD::MASKED_UREM:
951 case ISD::MASKED_SREM:
952 Res = ScalarizeVecOp_MaskedBinOp(N, OpNo);
953 break;
954 }
955
956 // If the result is null, the sub-method took care of registering results etc.
957 if (!Res.getNode()) return false;
958
959 // If the result is N, the sub-method updated N in place. Tell the legalizer
960 // core about this.
961 if (Res.getNode() == N)
962 return true;
963
964 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
965 "Invalid operand expansion");
966
967 ReplaceValueWith(SDValue(N, 0), Res);
968 return false;
969}
970
971/// If the value to convert is a vector that needs to be scalarized, it must be
972/// <1 x ty>. Convert the element instead.
973SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
974 SDValue Elt = GetScalarizedVector(N->getOperand(0));
975 return DAG.getNode(ISD::BITCAST, SDLoc(N),
976 N->getValueType(0), Elt);
977}
978
979// Need to legalize vector operands of fake uses. Must be <1 x ty>.
980SDValue DAGTypeLegalizer::ScalarizeVecOp_FAKE_USE(SDNode *N) {
981 assert(N->getOperand(1).getValueType().getVectorNumElements() == 1 &&
982 "Fake Use: Unexpected vector type!");
983 SDValue Elt = GetScalarizedVector(N->getOperand(1));
984 return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Elt);
985}
986
987/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
988/// Do the operation on the element instead.
989SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
990 assert(N->getValueType(0).getVectorNumElements() == 1 &&
991 "Unexpected vector type!");
992 SDValue Elt = GetScalarizedVector(N->getOperand(0));
993 SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
994 N->getValueType(0).getScalarType(), Elt);
995 // Revectorize the result so the types line up with what the uses of this
996 // expression expect.
997 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
998}
999
1000/// Same as ScalarizeVecOp_UnaryOp with an extra operand (for example a
1001/// typesize).
1002SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOpWithExtraInput(SDNode *N) {
1003 assert(N->getValueType(0).getVectorNumElements() == 1 &&
1004 "Unexpected vector type!");
1005 SDValue Elt = GetScalarizedVector(N->getOperand(0));
1006 SDValue Op =
1007 DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(),
1008 Elt, N->getOperand(1));
1009 // Revectorize the result so the types line up with what the uses of this
1010 // expression expect.
1011 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
1012}
1013
1014/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
1015/// Do the strict FP operation on the element instead.
1016SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
1017 assert(N->getValueType(0).getVectorNumElements() == 1 &&
1018 "Unexpected vector type!");
1019 SDValue Elt = GetScalarizedVector(N->getOperand(1));
1020 SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
1021 { N->getValueType(0).getScalarType(), MVT::Other },
1022 { N->getOperand(0), Elt });
1023 // Legalize the chain result - switch anything that used the old chain to
1024 // use the new one.
1025 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
1026 // Revectorize the result so the types line up with what the uses of this
1027 // expression expect.
1028 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1029
1030 // Do our own replacement and return SDValue() to tell the caller that we
1031 // handled all replacements since caller can only handle a single result.
1032 ReplaceValueWith(SDValue(N, 0), Res);
1033 return SDValue();
1034}
1035
1036/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
1037SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
1038 SmallVector<SDValue, 8> Ops(N->getNumOperands());
1039 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
1040 Ops[i] = GetScalarizedVector(N->getOperand(i));
1041 return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
1042}
1043
1044/// The inserted subvector is to be scalarized - use insert vector element
1045/// instead.
1046SDValue DAGTypeLegalizer::ScalarizeVecOp_INSERT_SUBVECTOR(SDNode *N,
1047 unsigned OpNo) {
1048 // We should not be attempting to scalarize the containing vector
1049 assert(OpNo == 1);
1050 SDValue Elt = GetScalarizedVector(N->getOperand(1));
1051 SDValue ContainingVec = N->getOperand(0);
1052 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
1053 ContainingVec.getValueType(), ContainingVec, Elt,
1054 N->getOperand(2));
1055}
1056
1057/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
1058/// so just return the element, ignoring the index.
1059SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
1060 EVT VT = N->getValueType(0);
1061 SDValue Res = GetScalarizedVector(N->getOperand(0));
1062 if (Res.getValueType() != VT)
1063 Res = VT.isFloatingPoint()
1064 ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
1065 : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
1066 return Res;
1067}
1068
1069/// If the input condition is a vector that needs to be scalarized, it must be
1070/// <1 x i1>, so just convert to a normal ISD::SELECT
1071/// (still with vector output type since that was acceptable if we got here).
1072SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
1073 SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
1074 EVT VT = N->getValueType(0);
1075
1076 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
1077 N->getOperand(2));
1078}
1079
1080/// If the operand is a vector that needs to be scalarized then the
1081/// result must be v1i1, so just convert to a scalar SETCC and wrap
1082/// with a scalar_to_vector since the res type is legal if we got here
1083SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
1084 assert(N->getValueType(0).isVector() &&
1085 N->getOperand(0).getValueType().isVector() &&
1086 "Operand types must be vectors");
1087 assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
1088
1089 EVT VT = N->getValueType(0);
1090 SDValue LHS = GetScalarizedVector(N->getOperand(0));
1091 SDValue RHS = GetScalarizedVector(N->getOperand(1));
1092
1093 EVT OpVT = N->getOperand(0).getValueType();
1094 EVT NVT = VT.getVectorElementType();
1095 SDLoc DL(N);
1096 // Turn it into a scalar SETCC.
1097 SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
1098 N->getOperand(2));
1099
1100 // Vectors may have a different boolean contents to scalars. Promote the
1101 // value appropriately.
1102 ISD::NodeType ExtendCode =
1103 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
1104
1105 Res = DAG.getNode(ExtendCode, DL, NVT, Res);
1106
1107 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
1108}
1109
1110// Similiar to ScalarizeVecOp_VSETCC, with added logic to update chains.
1111SDValue DAGTypeLegalizer::ScalarizeVecOp_VSTRICT_FSETCC(SDNode *N,
1112 unsigned OpNo) {
1113 assert(OpNo == 1 && "Wrong operand for scalarization!");
1114 assert(N->getValueType(0).isVector() &&
1115 N->getOperand(1).getValueType().isVector() &&
1116 "Operand types must be vectors");
1117 assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
1118
1119 EVT VT = N->getValueType(0);
1120 SDValue Ch = N->getOperand(0);
1121 SDValue LHS = GetScalarizedVector(N->getOperand(1));
1122 SDValue RHS = GetScalarizedVector(N->getOperand(2));
1123 SDValue CC = N->getOperand(3);
1124
1125 EVT OpVT = N->getOperand(1).getValueType();
1126 EVT NVT = VT.getVectorElementType();
1127 SDLoc DL(N);
1128 SDValue Res = DAG.getNode(N->getOpcode(), DL, {MVT::i1, MVT::Other},
1129 {Ch, LHS, RHS, CC});
1130
1131 // Legalize the chain result - switch anything that used the old chain to
1132 // use the new one.
1133 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
1134
1135 ISD::NodeType ExtendCode =
1136 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
1137
1138 Res = DAG.getNode(ExtendCode, DL, NVT, Res);
1139 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
1140
1141 // Do our own replacement and return SDValue() to tell the caller that we
1142 // handled all replacements since caller can only handle a single result.
1143 ReplaceValueWith(SDValue(N, 0), Res);
1144 return SDValue();
1145}
1146
1147/// If the value to store is a vector that needs to be scalarized, it must be
1148/// <1 x ty>. Just store the element.
1149SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
1150 assert(N->isUnindexed() && "Indexed store of one-element vector?");
1151 assert(OpNo == 1 && "Do not know how to scalarize this operand!");
1152 SDLoc dl(N);
1153
1154 if (N->isTruncatingStore())
1155 return DAG.getTruncStore(
1156 N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
1157 N->getBasePtr(), N->getPointerInfo(),
1158 N->getMemoryVT().getVectorElementType(), N->getBaseAlign(),
1159 N->getMemOperand()->getFlags(), N->getAAInfo());
1160
1161 return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
1162 N->getBasePtr(), N->getPointerInfo(), N->getBaseAlign(),
1163 N->getMemOperand()->getFlags(), N->getAAInfo());
1164}
1165
1166/// If the value to round is a vector that needs to be scalarized, it must be
1167/// <1 x ty>. Convert the element instead.
1168SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
1169 assert(OpNo == 0 && "Wrong operand for scalarization!");
1170 SDValue Elt = GetScalarizedVector(N->getOperand(0));
1171 SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
1172 N->getValueType(0).getVectorElementType(), Elt,
1173 N->getOperand(1));
1174 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1175}
1176
1177SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
1178 unsigned OpNo) {
1179 assert(OpNo == 1 && "Wrong operand for scalarization!");
1180 SDValue Elt = GetScalarizedVector(N->getOperand(1));
1181 SDValue Res =
1182 DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
1183 {N->getValueType(0).getVectorElementType(), MVT::Other},
1184 {N->getOperand(0), Elt, N->getOperand(2)});
1185 // Legalize the chain result - switch anything that used the old chain to
1186 // use the new one.
1187 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
1188
1189 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1190
1191 // Do our own replacement and return SDValue() to tell the caller that we
1192 // handled all replacements since caller can only handle a single result.
1193 ReplaceValueWith(SDValue(N, 0), Res);
1194 return SDValue();
1195}
1196
1197/// If the value to extend is a vector that needs to be scalarized, it must be
1198/// <1 x ty>. Convert the element instead.
1199SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_EXTEND(SDNode *N) {
1200 SDValue Elt = GetScalarizedVector(N->getOperand(0));
1201 SDValue Res = DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
1202 N->getValueType(0).getVectorElementType(), Elt);
1203 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1204}
1205
1206/// If the value to extend is a vector that needs to be scalarized, it must be
1207/// <1 x ty>. Convert the element instead.
1208SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N) {
1209 SDValue Elt = GetScalarizedVector(N->getOperand(1));
1210 SDValue Res =
1211 DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
1212 {N->getValueType(0).getVectorElementType(), MVT::Other},
1213 {N->getOperand(0), Elt});
1214 // Legalize the chain result - switch anything that used the old chain to
1215 // use the new one.
1216 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
1217
1218 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1219
1220 // Do our own replacement and return SDValue() to tell the caller that we
1221 // handled all replacements since caller can only handle a single result.
1222 ReplaceValueWith(SDValue(N, 0), Res);
1223 return SDValue();
1224}
1225
1226SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
1227 SDValue Res = GetScalarizedVector(N->getOperand(0));
1228 // Result type may be wider than element type.
1229 if (Res.getValueType() != N->getValueType(0))
1230 Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res);
1231 return Res;
1232}
1233
1234SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) {
1235 SDValue AccOp = N->getOperand(0);
1236 SDValue VecOp = N->getOperand(1);
1237
1238 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
1239
1240 SDValue Op = GetScalarizedVector(VecOp);
1241 return DAG.getNode(BaseOpc, SDLoc(N), N->getValueType(0),
1242 AccOp, Op, N->getFlags());
1243}
1244
1245SDValue DAGTypeLegalizer::ScalarizeVecOp_CMP(SDNode *N) {
1246 SDValue LHS = GetScalarizedVector(N->getOperand(0));
1247 SDValue RHS = GetScalarizedVector(N->getOperand(1));
1248
1249 EVT ResVT = N->getValueType(0).getVectorElementType();
1250 SDValue Cmp = DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, LHS, RHS);
1251 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Cmp);
1252}
1253
1254SDValue DAGTypeLegalizer::ScalarizeVecOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N) {
1255 // Since there is no "none-active" result, the only valid return for <1 x ty>
1256 // is 0. Note: Since we check the high mask during splitting this is safe.
1257 // As e.g., a <2 x ty> operation would split to:
1258 // any_active(%hi_mask) ? (1 + last_active(%hi_mask))
1259 // : `last_active(%lo_mask)`
1260 // Which then scalarizes to:
1261 // %mask[1] ? 1 : 0
1262 EVT VT = N->getValueType(0);
1263 return DAG.getConstant(0, SDLoc(N), VT);
1264}
1265
1266SDValue DAGTypeLegalizer::ScalarizeVecOp_CTTZ_ELTS(SDNode *N) {
1267 // The number of trailing zero elements is 1 if the element is 0, and 0
1268 // otherwise.
1269 if (N->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON)
1270 return DAG.getConstant(0, SDLoc(N), N->getValueType(0));
1271 SDValue Op = GetScalarizedVector(N->getOperand(0));
1272 SDValue SetCC =
1273 DAG.getSetCC(SDLoc(N), MVT::i1, Op,
1274 DAG.getConstant(0, SDLoc(N), Op.getValueType()), ISD::SETEQ);
1275 return DAG.getZExtOrTrunc(SetCC, SDLoc(N), N->getValueType(0));
1276}
1277
1278SDValue DAGTypeLegalizer::ScalarizeVecOp_MaskedBinOp(SDNode *N, unsigned OpNo) {
1279 assert(OpNo == 2 && "Can only scalarize mask operand");
1280 SDLoc DL(N);
1281 EVT VT = N->getOperand(0).getValueType().getVectorElementType();
1282 SDValue LHS = DAG.getExtractVectorElt(DL, VT, N->getOperand(0), 0);
1283 SDValue RHS = DAG.getExtractVectorElt(DL, VT, N->getOperand(1), 0);
1284 SDValue Mask = GetScalarizedVector(N->getOperand(2));
1285 // Vectors may have a different boolean contents to scalars, so truncate to i1
1286 // and let type legalization promote appropriately.
1287 Mask = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Mask);
1288 // Masked binary ops don't have UB on disabled lanes but produce poison, so
1289 // use 1 as the divisor to avoid division by zero and overflow.
1290 SDValue BinOp =
1291 DAG.getNode(ISD::getUnmaskedBinOpOpcode(N->getOpcode()), DL, VT, LHS,
1292 DAG.getSelect(DL, VT, Mask, RHS, DAG.getConstant(1, DL, VT)));
1293 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, N->getValueType(0), BinOp);
1294}
1295
1296//===----------------------------------------------------------------------===//
1297// Result Vector Splitting
1298//===----------------------------------------------------------------------===//
1299
1300/// This method is called when the specified result of the specified node is
1301/// found to need vector splitting. At this point, the node may also have
1302/// invalid operands or may have other results that need legalization, we just
1303/// know that (at least) one result needs vector splitting.
1304void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
1305 LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG));
1306 SDValue Lo, Hi;
1307
1308 // See if the target wants to custom expand this node.
1309 if (CustomLowerNode(N, N->getValueType(ResNo), true))
1310 return;
1311
1312 switch (N->getOpcode()) {
1313 default:
1314#ifndef NDEBUG
1315 dbgs() << "SplitVectorResult #" << ResNo << ": ";
1316 N->dump(&DAG);
1317 dbgs() << "\n";
1318#endif
1319 report_fatal_error("Do not know how to split the result of this "
1320 "operator!\n");
1321
1324 SplitVecRes_LOOP_DEPENDENCE_MASK(N, Lo, Hi);
1325 break;
1326 case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
1327 case ISD::AssertZext: SplitVecRes_AssertZext(N, Lo, Hi); break;
1328 case ISD::AssertSext: SplitVecRes_AssertSext(N, Lo, Hi); break;
1329 case ISD::VSELECT:
1330 case ISD::SELECT:
1331 case ISD::VP_MERGE:
1332 case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break;
1333 case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
1334 case ISD::POISON:
1335 case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
1336 case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
1337 case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
1338 case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
1339 case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
1340 case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
1341 case ISD::FPOWI:
1342 case ISD::FLDEXP:
1343 case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break;
1344 case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
1345 case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
1346 case ISD::SPLAT_VECTOR:
1348 SplitVecRes_ScalarOp(N, Lo, Hi);
1349 break;
1350 case ISD::STEP_VECTOR:
1351 SplitVecRes_STEP_VECTOR(N, Lo, Hi);
1352 break;
1353 case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
1354 case ISD::ATOMIC_LOAD:
1355 SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi);
1356 break;
1357 case ISD::LOAD:
1358 SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
1359 break;
1360 case ISD::VP_LOAD:
1361 SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi);
1362 break;
1363 case ISD::VP_LOAD_FF:
1364 SplitVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N), Lo, Hi);
1365 break;
1366 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
1367 SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi);
1368 break;
1369 case ISD::MLOAD:
1370 SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
1371 break;
1372 case ISD::MGATHER:
1373 case ISD::VP_GATHER:
1374 SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true);
1375 break;
1377 SplitVecRes_VECTOR_COMPRESS(N, Lo, Hi);
1378 break;
1379 case ISD::SETCC:
1380 case ISD::VP_SETCC:
1381 SplitVecRes_SETCC(N, Lo, Hi);
1382 break;
1384 SplitVecRes_VECTOR_REVERSE(N, Lo, Hi);
1385 break;
1387 SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
1388 break;
1391 SplitVecRes_VECTOR_SPLICE(N, Lo, Hi);
1392 break;
1394 SplitVecRes_VECTOR_DEINTERLEAVE(N);
1395 return;
1397 SplitVecRes_VECTOR_INTERLEAVE(N);
1398 return;
1399 case ISD::VAARG:
1400 SplitVecRes_VAARG(N, Lo, Hi);
1401 break;
1402
1406 SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
1407 break;
1408
1409 case ISD::ABS:
1411 case ISD::VP_ABS:
1412 case ISD::BITREVERSE:
1413 case ISD::VP_BITREVERSE:
1414 case ISD::BSWAP:
1415 case ISD::VP_BSWAP:
1416 case ISD::CTLZ:
1417 case ISD::VP_CTLZ:
1418 case ISD::CTTZ:
1419 case ISD::VP_CTTZ:
1421 case ISD::VP_CTLZ_ZERO_POISON:
1423 case ISD::VP_CTTZ_ZERO_POISON:
1424 case ISD::CTPOP:
1425 case ISD::VP_CTPOP:
1426 case ISD::FABS: case ISD::VP_FABS:
1427 case ISD::FACOS:
1428 case ISD::FASIN:
1429 case ISD::FATAN:
1430 case ISD::FCEIL:
1431 case ISD::VP_FCEIL:
1432 case ISD::FCOS:
1433 case ISD::FCOSH:
1434 case ISD::FEXP:
1435 case ISD::FEXP2:
1436 case ISD::FEXP10:
1437 case ISD::FFLOOR:
1438 case ISD::VP_FFLOOR:
1439 case ISD::FLOG:
1440 case ISD::FLOG10:
1441 case ISD::FLOG2:
1442 case ISD::FNEARBYINT:
1443 case ISD::VP_FNEARBYINT:
1444 case ISD::FNEG: case ISD::VP_FNEG:
1445 case ISD::FREEZE:
1446 case ISD::ARITH_FENCE:
1447 case ISD::FP_EXTEND:
1448 case ISD::VP_FP_EXTEND:
1449 case ISD::FP_ROUND:
1450 case ISD::VP_FP_ROUND:
1451 case ISD::FP_TO_SINT:
1452 case ISD::VP_FP_TO_SINT:
1453 case ISD::FP_TO_UINT:
1454 case ISD::VP_FP_TO_UINT:
1455 case ISD::FRINT:
1456 case ISD::VP_FRINT:
1457 case ISD::LRINT:
1458 case ISD::VP_LRINT:
1459 case ISD::LLRINT:
1460 case ISD::VP_LLRINT:
1461 case ISD::FROUND:
1462 case ISD::VP_FROUND:
1463 case ISD::FROUNDEVEN:
1464 case ISD::VP_FROUNDEVEN:
1465 case ISD::LROUND:
1466 case ISD::LLROUND:
1467 case ISD::FSIN:
1468 case ISD::FSINH:
1469 case ISD::FSQRT: case ISD::VP_SQRT:
1470 case ISD::FTAN:
1471 case ISD::FTANH:
1472 case ISD::FTRUNC:
1473 case ISD::VP_FROUNDTOZERO:
1474 case ISD::SINT_TO_FP:
1475 case ISD::VP_SINT_TO_FP:
1476 case ISD::TRUNCATE:
1477 case ISD::VP_TRUNCATE:
1478 case ISD::UINT_TO_FP:
1479 case ISD::VP_UINT_TO_FP:
1480 case ISD::FCANONICALIZE:
1483 SplitVecRes_UnaryOp(N, Lo, Hi);
1484 break;
1485 case ISD::ADDRSPACECAST:
1486 SplitVecRes_ADDRSPACECAST(N, Lo, Hi);
1487 break;
1488 case ISD::FMODF:
1489 case ISD::FFREXP:
1490 case ISD::FSINCOS:
1491 case ISD::FSINCOSPI:
1492 SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi);
1493 break;
1494
1495 case ISD::ANY_EXTEND:
1496 case ISD::SIGN_EXTEND:
1497 case ISD::ZERO_EXTEND:
1498 case ISD::VP_SIGN_EXTEND:
1499 case ISD::VP_ZERO_EXTEND:
1500 SplitVecRes_ExtendOp(N, Lo, Hi);
1501 break;
1502
1503 case ISD::ADD: case ISD::VP_ADD:
1504 case ISD::SUB: case ISD::VP_SUB:
1505 case ISD::MUL: case ISD::VP_MUL:
1506 case ISD::CLMUL:
1507 case ISD::CLMULR:
1508 case ISD::CLMULH:
1509 case ISD::MULHS:
1510 case ISD::MULHU:
1511 case ISD::ABDS:
1512 case ISD::ABDU:
1513 case ISD::AVGCEILS:
1514 case ISD::AVGCEILU:
1515 case ISD::AVGFLOORS:
1516 case ISD::AVGFLOORU:
1517 case ISD::FADD: case ISD::VP_FADD:
1518 case ISD::FSUB: case ISD::VP_FSUB:
1519 case ISD::FMUL: case ISD::VP_FMUL:
1520 case ISD::FMINNUM:
1521 case ISD::FMINNUM_IEEE:
1522 case ISD::VP_FMINNUM:
1523 case ISD::FMAXNUM:
1524 case ISD::FMAXNUM_IEEE:
1525 case ISD::VP_FMAXNUM:
1526 case ISD::FMINIMUM:
1527 case ISD::VP_FMINIMUM:
1528 case ISD::FMAXIMUM:
1529 case ISD::VP_FMAXIMUM:
1530 case ISD::FMINIMUMNUM:
1531 case ISD::FMAXIMUMNUM:
1532 case ISD::SDIV: case ISD::VP_SDIV:
1533 case ISD::UDIV: case ISD::VP_UDIV:
1534 case ISD::FDIV: case ISD::VP_FDIV:
1535 case ISD::FPOW:
1536 case ISD::FATAN2:
1537 case ISD::AND: case ISD::VP_AND:
1538 case ISD::OR: case ISD::VP_OR:
1539 case ISD::XOR: case ISD::VP_XOR:
1540 case ISD::SHL: case ISD::VP_SHL:
1541 case ISD::SRA: case ISD::VP_SRA:
1542 case ISD::SRL: case ISD::VP_SRL:
1543 case ISD::UREM: case ISD::VP_UREM:
1544 case ISD::SREM: case ISD::VP_SREM:
1545 case ISD::FREM: case ISD::VP_FREM:
1546 case ISD::SMIN: case ISD::VP_SMIN:
1547 case ISD::SMAX: case ISD::VP_SMAX:
1548 case ISD::UMIN: case ISD::VP_UMIN:
1549 case ISD::UMAX: case ISD::VP_UMAX:
1550 case ISD::SADDSAT: case ISD::VP_SADDSAT:
1551 case ISD::UADDSAT: case ISD::VP_UADDSAT:
1552 case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
1553 case ISD::USUBSAT: case ISD::VP_USUBSAT:
1554 case ISD::SSHLSAT:
1555 case ISD::USHLSAT:
1556 case ISD::ROTL:
1557 case ISD::ROTR:
1558 case ISD::VP_FCOPYSIGN:
1559 SplitVecRes_BinOp(N, Lo, Hi);
1560 break;
1561 case ISD::MASKED_UDIV:
1562 case ISD::MASKED_SDIV:
1563 case ISD::MASKED_UREM:
1564 case ISD::MASKED_SREM:
1565 SplitVecRes_MaskedBinOp(N, Lo, Hi);
1566 break;
1567 case ISD::FMA: case ISD::VP_FMA:
1568 case ISD::FSHL:
1569 case ISD::VP_FSHL:
1570 case ISD::FSHR:
1571 case ISD::VP_FSHR:
1572 SplitVecRes_TernaryOp(N, Lo, Hi);
1573 break;
1574
1575 case ISD::SCMP: case ISD::UCMP:
1576 SplitVecRes_CMP(N, Lo, Hi);
1577 break;
1578
1579#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1580 case ISD::STRICT_##DAGN:
1581#include "llvm/IR/ConstrainedOps.def"
1582 SplitVecRes_StrictFPOp(N, Lo, Hi);
1583 break;
1584
1587 SplitVecRes_FP_TO_XINT_SAT(N, Lo, Hi);
1588 break;
1589
1590 case ISD::UADDO:
1591 case ISD::SADDO:
1592 case ISD::USUBO:
1593 case ISD::SSUBO:
1594 case ISD::UMULO:
1595 case ISD::SMULO:
1596 SplitVecRes_OverflowOp(N, ResNo, Lo, Hi);
1597 break;
1598 case ISD::SMULFIX:
1599 case ISD::SMULFIXSAT:
1600 case ISD::UMULFIX:
1601 case ISD::UMULFIXSAT:
1602 case ISD::SDIVFIX:
1603 case ISD::SDIVFIXSAT:
1604 case ISD::UDIVFIX:
1605 case ISD::UDIVFIXSAT:
1606 SplitVecRes_FIX(N, Lo, Hi);
1607 break;
1608 case ISD::EXPERIMENTAL_VP_SPLICE:
1609 SplitVecRes_VP_SPLICE(N, Lo, Hi);
1610 break;
1611 case ISD::EXPERIMENTAL_VP_REVERSE:
1612 SplitVecRes_VP_REVERSE(N, Lo, Hi);
1613 break;
1618 SplitVecRes_PARTIAL_REDUCE_MLA(N, Lo, Hi);
1619 break;
1621 SplitVecRes_GET_ACTIVE_LANE_MASK(N, Lo, Hi);
1622 break;
1623 }
1624
1625 // If Lo/Hi is null, the sub-method took care of registering results etc.
1626 if (Lo.getNode())
1627 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
1628}
1629
1630void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
1631 MachinePointerInfo &MPI, SDValue &Ptr,
1632 uint64_t *ScaledOffset) {
1633 SDLoc DL(N);
1634 unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinValue() / 8;
1635
1636 if (MemVT.isScalableVector()) {
1637 SDValue BytesIncrement = DAG.getVScale(
1638 DL, Ptr.getValueType(),
1639 APInt(Ptr.getValueSizeInBits().getFixedValue(), IncrementSize));
1640 MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
1641 if (ScaledOffset)
1642 *ScaledOffset += IncrementSize;
1643 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement,
1645 } else {
1646 MPI = N->getPointerInfo().getWithOffset(IncrementSize);
1647 // Increment the pointer to the other half.
1648 Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::getFixed(IncrementSize));
1649 }
1650}
1651
1652std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask) {
1653 return SplitMask(Mask, SDLoc(Mask));
1654}
1655
1656std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask,
1657 const SDLoc &DL) {
1658 SDValue MaskLo, MaskHi;
1659 EVT MaskVT = Mask.getValueType();
1660 if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
1661 GetSplitVector(Mask, MaskLo, MaskHi);
1662 else
1663 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
1664 return std::make_pair(MaskLo, MaskHi);
1665}
1666
1667void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) {
1668 SDValue LHSLo, LHSHi;
1669 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
1670 SDValue RHSLo, RHSHi;
1671 GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
1672 SDLoc dl(N);
1673
1674 const SDNodeFlags Flags = N->getFlags();
1675 unsigned Opcode = N->getOpcode();
1676 if (N->getNumOperands() == 2) {
1677 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
1678 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
1679 return;
1680 }
1681
1682 assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
1683 assert(N->isVPOpcode() && "Expected VP opcode");
1684
1685 SDValue MaskLo, MaskHi;
1686 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
1687
1688 SDValue EVLLo, EVLHi;
1689 std::tie(EVLLo, EVLHi) =
1690 DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl);
1691
1692 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
1693 {LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
1694 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(),
1695 {LHSHi, RHSHi, MaskHi, EVLHi}, Flags);
1696}
1697
1698void DAGTypeLegalizer::SplitVecRes_MaskedBinOp(SDNode *N, SDValue &Lo,
1699 SDValue &Hi) {
1700 SDValue LHSLo, LHSHi;
1701 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
1702 SDValue RHSLo, RHSHi;
1703 GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
1704 auto [MaskLo, MaskHi] = SplitMask(N->getOperand(2));
1705 SDLoc dl(N);
1706
1707 const SDNodeFlags Flags = N->getFlags();
1708 unsigned Opcode = N->getOpcode();
1709 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, MaskLo,
1710 Flags);
1711 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, MaskHi,
1712 Flags);
1713}
1714
1715void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
1716 SDValue &Hi) {
1717 SDValue Op0Lo, Op0Hi;
1718 GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
1719 SDValue Op1Lo, Op1Hi;
1720 GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
1721 SDValue Op2Lo, Op2Hi;
1722 GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
1723 SDLoc dl(N);
1724
1725 const SDNodeFlags Flags = N->getFlags();
1726 unsigned Opcode = N->getOpcode();
1727 if (N->getNumOperands() == 3) {
1728 Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo, Flags);
1729 Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, Op2Hi, Flags);
1730 return;
1731 }
1732
1733 assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
1734 assert(N->isVPOpcode() && "Expected VP opcode");
1735
1736 SDValue MaskLo, MaskHi;
1737 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
1738
1739 SDValue EVLLo, EVLHi;
1740 std::tie(EVLLo, EVLHi) =
1741 DAG.SplitEVL(N->getOperand(4), N->getValueType(0), dl);
1742
1743 Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(),
1744 {Op0Lo, Op1Lo, Op2Lo, MaskLo, EVLLo}, Flags);
1745 Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(),
1746 {Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags);
1747}
1748
1749void DAGTypeLegalizer::SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi) {
1750 LLVMContext &Ctxt = *DAG.getContext();
1751 SDLoc dl(N);
1752
1753 SDValue LHS = N->getOperand(0);
1754 SDValue RHS = N->getOperand(1);
1755
1756 SDValue LHSLo, LHSHi, RHSLo, RHSHi;
1757 if (getTypeAction(LHS.getValueType()) == TargetLowering::TypeSplitVector) {
1758 GetSplitVector(LHS, LHSLo, LHSHi);
1759 GetSplitVector(RHS, RHSLo, RHSHi);
1760 } else {
1761 std::tie(LHSLo, LHSHi) = DAG.SplitVector(LHS, dl);
1762 std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, dl);
1763 }
1764
1765 EVT SplitResVT = N->getValueType(0).getHalfNumVectorElementsVT(Ctxt);
1766 Lo = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSLo, RHSLo);
1767 Hi = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSHi, RHSHi);
1768}
1769
1770void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
1771 SDValue LHSLo, LHSHi;
1772 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
1773 SDValue RHSLo, RHSHi;
1774 GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
1775 SDLoc dl(N);
1776 SDValue Op2 = N->getOperand(2);
1777
1778 unsigned Opcode = N->getOpcode();
1779 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2,
1780 N->getFlags());
1781 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2,
1782 N->getFlags());
1783}
1784
1785void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
1786 SDValue &Hi) {
1787 // We know the result is a vector. The input may be either a vector or a
1788 // scalar value.
1789 EVT LoVT, HiVT;
1790 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1791 SDLoc dl(N);
1792
1793 SDValue InOp = N->getOperand(0);
1794 EVT InVT = InOp.getValueType();
1795
1796 // Handle some special cases efficiently.
1797 switch (getTypeAction(InVT)) {
1804 break;
1807 // A scalar to vector conversion, where the scalar needs expansion.
1808 // If the vector is being split in two then we can just convert the
1809 // expanded pieces.
1810 if (LoVT == HiVT) {
1811 GetExpandedOp(InOp, Lo, Hi);
1812 if (DAG.getDataLayout().isBigEndian())
1813 std::swap(Lo, Hi);
1814 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
1815 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
1816 return;
1817 }
1818 break;
1820 // If the input is a vector that needs to be split, convert each split
1821 // piece of the input now.
1822 GetSplitVector(InOp, Lo, Hi);
1823 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
1824 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
1825 return;
1827 report_fatal_error("Scalarization of scalable vectors is not supported.");
1828 }
1829
1830 if (LoVT.isScalableVector()) {
1831 auto [InLo, InHi] = DAG.SplitVectorOperand(N, 0);
1832 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, InLo);
1833 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, InHi);
1834 return;
1835 }
1836
1837 // In the general case, convert the input to an integer and split it by hand.
1838 EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
1839 EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
1840 if (DAG.getDataLayout().isBigEndian())
1841 std::swap(LoIntVT, HiIntVT);
1842
1843 SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
1844
1845 if (DAG.getDataLayout().isBigEndian())
1846 std::swap(Lo, Hi);
1847 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
1848 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
1849}
1850
1851void DAGTypeLegalizer::SplitVecRes_LOOP_DEPENDENCE_MASK(SDNode *N, SDValue &Lo,
1852 SDValue &Hi) {
1853 SDLoc DL(N);
1854 EVT LoVT, HiVT;
1855 SDValue PtrA = N->getOperand(0);
1856 SDValue PtrB = N->getOperand(1);
1857 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1858
1859 // The lane offset for the "Lo" half of the mask is unchanged.
1860 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, PtrA, PtrB,
1861 /*ElementSizeInBytes=*/N->getOperand(2),
1862 /*LaneOffset=*/N->getOperand(3));
1863 // The lane offset for the "Hi" half of the mask is incremented by the number
1864 // of elements in the "Lo" half.
1865 unsigned LaneOffset =
1867 // Note: The lane offset is implicitly scalable for scalable masks.
1868 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, PtrA, PtrB,
1869 /*ElementSizeInBytes=*/N->getOperand(2),
1870 /*LaneOffset=*/DAG.getConstant(LaneOffset, DL, MVT::i64));
1871}
1872
1873void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
1874 SDValue &Hi) {
1875 EVT LoVT, HiVT;
1876 SDLoc dl(N);
1877 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1878 unsigned LoNumElts = LoVT.getVectorNumElements();
1879 SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
1880 Lo = DAG.getBuildVector(LoVT, dl, LoOps);
1881
1882 SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
1883 Hi = DAG.getBuildVector(HiVT, dl, HiOps);
1884}
1885
1886void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
1887 SDValue &Hi) {
1888 assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
1889 SDLoc dl(N);
1890 unsigned NumSubvectors = N->getNumOperands() / 2;
1891 if (NumSubvectors == 1) {
1892 Lo = N->getOperand(0);
1893 Hi = N->getOperand(1);
1894 return;
1895 }
1896
1897 EVT LoVT, HiVT;
1898 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1899
1900 SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
1901 Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);
1902
1903 SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
1904 Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
1905}
1906
1907void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
1908 SDValue &Hi) {
1909 SDValue Vec = N->getOperand(0);
1910 SDValue Idx = N->getOperand(1);
1911 SDLoc dl(N);
1912
1913 EVT LoVT, HiVT;
1914 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1915
1916 Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
1917 uint64_t IdxVal = Idx->getAsZExtVal();
1918 Hi = DAG.getNode(
1919 ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
1920 DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorMinNumElements(), dl));
1921}
1922
1923void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
1924 SDValue &Hi) {
1925 SDValue Vec = N->getOperand(0);
1926 SDValue SubVec = N->getOperand(1);
1927 SDValue Idx = N->getOperand(2);
1928 SDLoc dl(N);
1929 GetSplitVector(Vec, Lo, Hi);
1930
1931 EVT VecVT = Vec.getValueType();
1932 EVT LoVT = Lo.getValueType();
1933 EVT SubVecVT = SubVec.getValueType();
1934 unsigned VecElems = VecVT.getVectorMinNumElements();
1935 unsigned SubElems = SubVecVT.getVectorMinNumElements();
1936 unsigned LoElems = LoVT.getVectorMinNumElements();
1937
1938 // If we know the index is in the first half, and we know the subvector
1939 // doesn't cross the boundary between the halves, we can avoid spilling the
1940 // vector, and insert into the lower half of the split vector directly.
1941 unsigned IdxVal = Idx->getAsZExtVal();
1942 if (IdxVal + SubElems <= LoElems) {
1943 Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
1944 return;
1945 }
1946 // Similarly if the subvector is fully in the high half, but mind that we
1947 // can't tell whether a fixed-length subvector is fully within the high half
1948 // of a scalable vector.
1949 if (VecVT.isScalableVector() == SubVecVT.isScalableVector() &&
1950 IdxVal >= LoElems && IdxVal + SubElems <= VecElems) {
1951 Hi = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, Hi.getValueType(), Hi, SubVec,
1952 DAG.getVectorIdxConstant(IdxVal - LoElems, dl));
1953 return;
1954 }
1955
1956 if (getTypeAction(SubVecVT) == TargetLowering::TypeWidenVector &&
1957 Vec.isUndef() && SubVecVT.getVectorElementType() == MVT::i1) {
1958 SDValue WideSubVec = GetWidenedVector(SubVec);
1959 if (WideSubVec.getValueType() == VecVT) {
1960 std::tie(Lo, Hi) = DAG.SplitVector(WideSubVec, SDLoc(WideSubVec));
1961 return;
1962 }
1963 }
1964
1965 // Spill the vector to the stack.
1966 // In cases where the vector is illegal it will be broken down into parts
1967 // and stored in parts - we should use the alignment for the smallest part.
1968 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
1970 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
1971 auto &MF = DAG.getMachineFunction();
1972 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
1973 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
1974
1975 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
1976 SmallestAlign);
1977
1978 // Store the new subvector into the specified index.
1979 SDValue SubVecPtr =
1980 TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
1981 Store = DAG.getStore(Store, dl, SubVec, SubVecPtr,
1983
1984 // Load the Lo part from the stack slot.
1985 Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, PtrInfo,
1986 SmallestAlign);
1987
1988 // Increment the pointer to the other part.
1989 auto *Load = cast<LoadSDNode>(Lo);
1990 MachinePointerInfo MPI = Load->getPointerInfo();
1991 IncrementPointer(Load, LoVT, MPI, StackPtr);
1992
1993 // Load the Hi part from the stack slot.
1994 Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign);
1995}
1996
1997// Handle splitting an FP where the second operand does not match the first
1998// type. The second operand may be a scalar, or a vector that has exactly as
1999// many elements as the first
2000void DAGTypeLegalizer::SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo,
2001 SDValue &Hi) {
2002 SDValue LHSLo, LHSHi;
2003 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
2004 SDLoc DL(N);
2005
2006 SDValue RHSLo, RHSHi;
2007 SDValue RHS = N->getOperand(1);
2008 EVT RHSVT = RHS.getValueType();
2009 if (RHSVT.isVector()) {
2010 if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
2011 GetSplitVector(RHS, RHSLo, RHSHi);
2012 else
2013 std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
2014
2015 Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHSLo);
2016 Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHSHi);
2017 } else {
2018 Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHS);
2019 Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHS);
2020 }
2021}
2022
2023void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
2024 SDValue &Hi) {
2025 SDLoc DL(N);
2026 SDValue ArgLo, ArgHi;
2027 SDValue Test = N->getOperand(1);
2028 SDValue FpValue = N->getOperand(0);
2029 if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector)
2030 GetSplitVector(FpValue, ArgLo, ArgHi);
2031 else
2032 std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue));
2033 EVT LoVT, HiVT;
2034 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2035
2036 Lo = DAG.getNode(ISD::IS_FPCLASS, DL, LoVT, ArgLo, Test, N->getFlags());
2037 Hi = DAG.getNode(ISD::IS_FPCLASS, DL, HiVT, ArgHi, Test, N->getFlags());
2038}
2039
2040void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
2041 SDValue &Hi) {
2042 SDValue LHSLo, LHSHi;
2043 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
2044 SDLoc dl(N);
2045
2046 EVT LoVT, HiVT;
2047 std::tie(LoVT, HiVT) =
2048 DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
2049
2050 Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
2051 DAG.getValueType(LoVT));
2052 Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
2053 DAG.getValueType(HiVT));
2054}
2055
2056void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
2057 SDValue &Hi) {
2058 unsigned Opcode = N->getOpcode();
2059 SDValue N0 = N->getOperand(0);
2060
2061 SDLoc dl(N);
2062 SDValue InLo, InHi;
2063
2064 if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector)
2065 GetSplitVector(N0, InLo, InHi);
2066 else
2067 std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0);
2068
2069 EVT InLoVT = InLo.getValueType();
2070 unsigned InNumElements = InLoVT.getVectorNumElements();
2071
2072 EVT OutLoVT, OutHiVT;
2073 std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2074 unsigned OutNumElements = OutLoVT.getVectorNumElements();
2075 assert((2 * OutNumElements) <= InNumElements &&
2076 "Illegal extend vector in reg split");
2077
2078 // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the
2079 // input vector (i.e. we only use InLo):
2080 // OutLo will extend the first OutNumElements from InLo.
2081 // OutHi will extend the next OutNumElements from InLo.
2082
2083 // Shuffle the elements from InLo for OutHi into the bottom elements to
2084 // create a 'fake' InHi.
2085 SmallVector<int, 8> SplitHi(InNumElements, -1);
2086 for (unsigned i = 0; i != OutNumElements; ++i)
2087 SplitHi[i] = i + OutNumElements;
2088 InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getPOISON(InLoVT), SplitHi);
2089
2090 Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo);
2091 Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
2092}
2093
2094void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
2095 SDValue &Hi) {
2096 unsigned NumOps = N->getNumOperands();
2097 SDValue Chain = N->getOperand(0);
2098 EVT LoVT, HiVT;
2099 SDLoc dl(N);
2100 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2101
2104
2105 // The Chain is the first operand.
2106 OpsLo[0] = Chain;
2107 OpsHi[0] = Chain;
2108
2109 // Now process the remaining operands.
2110 for (unsigned i = 1; i < NumOps; ++i) {
2111 SDValue Op = N->getOperand(i);
2112 SDValue OpLo = Op;
2113 SDValue OpHi = Op;
2114
2115 EVT InVT = Op.getValueType();
2116 if (InVT.isVector()) {
2117 // If the input also splits, handle it directly for a
2118 // compile time speedup. Otherwise split it by hand.
2119 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
2120 GetSplitVector(Op, OpLo, OpHi);
2121 else
2122 std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i);
2123 }
2124
2125 OpsLo[i] = OpLo;
2126 OpsHi[i] = OpHi;
2127 }
2128
2129 EVT LoValueVTs[] = {LoVT, MVT::Other};
2130 EVT HiValueVTs[] = {HiVT, MVT::Other};
2131 Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo,
2132 N->getFlags());
2133 Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi,
2134 N->getFlags());
2135
2136 // Build a factor node to remember that this Op is independent of the
2137 // other one.
2138 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2139 Lo.getValue(1), Hi.getValue(1));
2140
2141 // Legalize the chain result - switch anything that used the old chain to
2142 // use the new one.
2143 ReplaceValueWith(SDValue(N, 1), Chain);
2144}
2145
2146SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
2147 SDValue Chain = N->getOperand(0);
2148 EVT VT = N->getValueType(0);
2149 unsigned NE = VT.getVectorNumElements();
2150 EVT EltVT = VT.getVectorElementType();
2151 SDLoc dl(N);
2152
2154 SmallVector<SDValue, 4> Operands(N->getNumOperands());
2155
2156 // If ResNE is 0, fully unroll the vector op.
2157 if (ResNE == 0)
2158 ResNE = NE;
2159 else if (NE > ResNE)
2160 NE = ResNE;
2161
2162 //The results of each unrolled operation, including the chain.
2163 SDVTList ChainVTs = DAG.getVTList(EltVT, MVT::Other);
2165
2166 unsigned i;
2167 for (i = 0; i != NE; ++i) {
2168 Operands[0] = Chain;
2169 for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
2170 SDValue Operand = N->getOperand(j);
2171 EVT OperandVT = Operand.getValueType();
2172 if (OperandVT.isVector()) {
2173 EVT OperandEltVT = OperandVT.getVectorElementType();
2174 Operands[j] = DAG.getExtractVectorElt(dl, OperandEltVT, Operand, i);
2175 } else {
2176 Operands[j] = Operand;
2177 }
2178 }
2179 SDValue Scalar =
2180 DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands, N->getFlags());
2181
2182 //Add in the scalar as well as its chain value to the
2183 //result vectors.
2184 Scalars.push_back(Scalar);
2185 Chains.push_back(Scalar.getValue(1));
2186 }
2187
2188 for (; i < ResNE; ++i)
2189 Scalars.push_back(DAG.getPOISON(EltVT));
2190
2191 // Build a new factor node to connect the chain back together.
2192 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
2193 ReplaceValueWith(SDValue(N, 1), Chain);
2194
2195 // Create a new BUILD_VECTOR node
2196 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
2197 return DAG.getBuildVector(VecVT, dl, Scalars);
2198}
2199
2200void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
2201 SDValue &Lo, SDValue &Hi) {
2202 SDLoc dl(N);
2203 EVT ResVT = N->getValueType(0);
2204 EVT OvVT = N->getValueType(1);
2205 EVT LoResVT, HiResVT, LoOvVT, HiOvVT;
2206 std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT);
2207 std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT);
2208
2209 SDValue LoLHS, HiLHS, LoRHS, HiRHS;
2210 if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) {
2211 GetSplitVector(N->getOperand(0), LoLHS, HiLHS);
2212 GetSplitVector(N->getOperand(1), LoRHS, HiRHS);
2213 } else {
2214 std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0);
2215 std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1);
2216 }
2217
2218 unsigned Opcode = N->getOpcode();
2219 SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT);
2220 SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
2221 SDNode *LoNode =
2222 DAG.getNode(Opcode, dl, LoVTs, {LoLHS, LoRHS}, N->getFlags()).getNode();
2223 SDNode *HiNode =
2224 DAG.getNode(Opcode, dl, HiVTs, {HiLHS, HiRHS}, N->getFlags()).getNode();
2225
2226 Lo = SDValue(LoNode, ResNo);
2227 Hi = SDValue(HiNode, ResNo);
2228
2229 // Replace the other vector result not being explicitly split here.
2230 unsigned OtherNo = 1 - ResNo;
2231 EVT OtherVT = N->getValueType(OtherNo);
2232 if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
2233 SetSplitVector(SDValue(N, OtherNo),
2234 SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
2235 } else {
2236 SDValue OtherVal = DAG.getNode(
2237 ISD::CONCAT_VECTORS, dl, OtherVT,
2238 SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
2239 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
2240 }
2241}
2242
2243void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
2244 SDValue &Hi) {
2245 SDValue Vec = N->getOperand(0);
2246 SDValue Elt = N->getOperand(1);
2247 SDValue Idx = N->getOperand(2);
2248 SDLoc dl(N);
2249 GetSplitVector(Vec, Lo, Hi);
2250
2251 if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
2252 unsigned IdxVal = CIdx->getZExtValue();
2253 unsigned LoNumElts = Lo.getValueType().getVectorMinNumElements();
2254 if (IdxVal < LoNumElts) {
2255 Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
2256 Lo.getValueType(), Lo, Elt, Idx);
2257 return;
2258 } else if (!Vec.getValueType().isScalableVector()) {
2259 Hi = DAG.getInsertVectorElt(dl, Hi, Elt, IdxVal - LoNumElts);
2260 return;
2261 }
2262 }
2263
2264 // Make the vector elements byte-addressable if they aren't already.
2265 EVT VecVT = Vec.getValueType();
2266 EVT EltVT = VecVT.getVectorElementType();
2267 if (!EltVT.isByteSized()) {
2268 EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext());
2269 VecVT = VecVT.changeElementType(*DAG.getContext(), EltVT);
2270 Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
2271 // Extend the element type to match if needed.
2272 if (EltVT.bitsGT(Elt.getValueType()))
2273 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt);
2274 }
2275
2276 // Spill the vector to the stack.
2277 // In cases where the vector is illegal it will be broken down into parts
2278 // and stored in parts - we should use the alignment for the smallest part.
2279 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
2281 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
2282 auto &MF = DAG.getMachineFunction();
2283 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
2284 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
2285
2286 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
2287 SmallestAlign);
2288
2289 // Store the new element. This may be larger than the vector element type,
2290 // so use a truncating store.
2291 SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
2292 Store = DAG.getTruncStore(
2293 Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT,
2294 commonAlignment(SmallestAlign,
2295 EltVT.getFixedSizeInBits() / 8));
2296
2297 EVT LoVT, HiVT;
2298 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
2299
2300 // Load the Lo part from the stack slot.
2301 Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign);
2302
2303 // Increment the pointer to the other part.
2304 auto Load = cast<LoadSDNode>(Lo);
2305 MachinePointerInfo MPI = Load->getPointerInfo();
2306 IncrementPointer(Load, LoVT, MPI, StackPtr);
2307
2308 Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, MPI, SmallestAlign);
2309
2310 // If we adjusted the original type, we need to truncate the results.
2311 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2312 if (LoVT != Lo.getValueType())
2313 Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo);
2314 if (HiVT != Hi.getValueType())
2315 Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
2316}
2317
2318void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo,
2319 SDValue &Hi) {
2320 EVT LoVT, HiVT;
2321 SDLoc dl(N);
2322 assert(N->getValueType(0).isScalableVector() &&
2323 "Only scalable vectors are supported for STEP_VECTOR");
2324 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2325 SDValue Step = N->getOperand(0);
2326
2327 Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step);
2328
2329 // Hi = Lo + (EltCnt * Step)
2330 EVT EltVT = Step.getValueType();
2331 APInt StepVal = Step->getAsAPIntVal();
2332 SDValue StartOfHi =
2333 DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements());
2334 StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
2335 StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi);
2336
2337 Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step);
2338 Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi);
2339}
2340
2341void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
2342 SDValue &Hi) {
2343 EVT LoVT, HiVT;
2344 SDLoc dl(N);
2345 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2346 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0));
2347 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
2348 Hi = DAG.getPOISON(HiVT);
2349 } else {
2350 assert(N->getOpcode() == ISD::SPLAT_VECTOR && "Unexpected opcode");
2351 Hi = Lo;
2352 }
2353}
2354
2355void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
2356 SDValue &Hi) {
2357 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
2358 "Extended load during type legalization!");
2359 SDLoc dl(LD);
2360 EVT VT = LD->getValueType(0);
2361 EVT LoVT, HiVT;
2362 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
2363
2364 SDValue Ch = LD->getChain();
2365 SDValue Ptr = LD->getBasePtr();
2366
2367 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
2368 EVT MemIntVT =
2369 EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits());
2370 SDValue ALD = DAG.getAtomicLoad(LD->getExtensionType(), dl, MemIntVT, IntVT,
2371 Ch, Ptr, LD->getMemOperand());
2372
2373 EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
2374 EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
2375 SDValue ExtractLo, ExtractHi;
2376 SplitInteger(ALD, LoIntVT, HiIntVT, ExtractLo, ExtractHi);
2377
2378 Lo = DAG.getBitcast(LoVT, ExtractLo);
2379 Hi = DAG.getBitcast(HiVT, ExtractHi);
2380
2381 // Legalize the chain result - switch anything that used the old chain to
2382 // use the new one.
2383 ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1));
2384}
2385
2386void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
2387 SDValue &Hi) {
2388 assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
2389 EVT LoVT, HiVT;
2390 SDLoc dl(LD);
2391 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
2392
2393 ISD::LoadExtType ExtType = LD->getExtensionType();
2394 SDValue Ch = LD->getChain();
2395 SDValue Ptr = LD->getBasePtr();
2396 SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
2397 EVT MemoryVT = LD->getMemoryVT();
2398 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
2399 AAMDNodes AAInfo = LD->getAAInfo();
2400
2401 EVT LoMemVT, HiMemVT;
2402 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
2403
2404 if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) {
2405 SDValue Value, NewChain;
2406 std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
2407 std::tie(Lo, Hi) = DAG.SplitVector(Value, dl);
2408 ReplaceValueWith(SDValue(LD, 1), NewChain);
2409 return;
2410 }
2411
2412 Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
2413 LD->getPointerInfo(), LoMemVT, LD->getBaseAlign(), MMOFlags,
2414 AAInfo);
2415
2416 MachinePointerInfo MPI;
2417 IncrementPointer(LD, LoMemVT, MPI, Ptr);
2418
2419 Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI,
2420 HiMemVT, LD->getBaseAlign(), MMOFlags, AAInfo);
2421
2422 // Build a factor node to remember that this load is independent of the
2423 // other one.
2424 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
2425 Hi.getValue(1));
2426
2427 // Legalize the chain result - switch anything that used the old chain to
2428 // use the new one.
2429 ReplaceValueWith(SDValue(LD, 1), Ch);
2430}
2431
2432void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
2433 SDValue &Hi) {
2434 assert(LD->isUnindexed() && "Indexed VP load during type legalization!");
2435 EVT LoVT, HiVT;
2436 SDLoc dl(LD);
2437 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
2438
2439 ISD::LoadExtType ExtType = LD->getExtensionType();
2440 SDValue Ch = LD->getChain();
2441 SDValue Ptr = LD->getBasePtr();
2442 SDValue Offset = LD->getOffset();
2443 assert(Offset.isUndef() && "Unexpected indexed variable-length load offset");
2444 Align Alignment = LD->getBaseAlign();
2445 SDValue Mask = LD->getMask();
2446 SDValue EVL = LD->getVectorLength();
2447 EVT MemoryVT = LD->getMemoryVT();
2448
2449 EVT LoMemVT, HiMemVT;
2450 bool HiIsEmpty = false;
2451 std::tie(LoMemVT, HiMemVT) =
2452 DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
2453
2454 // Split Mask operand
2455 SDValue MaskLo, MaskHi;
2456 if (Mask.getOpcode() == ISD::SETCC) {
2457 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
2458 } else {
2459 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
2460 GetSplitVector(Mask, MaskLo, MaskHi);
2461 else
2462 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
2463 }
2464
2465 // Split EVL operand
2466 SDValue EVLLo, EVLHi;
2467 std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl);
2468
2469 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2470 LD->getPointerInfo(), MachineMemOperand::MOLoad,
2471 LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(),
2472 LD->getRanges());
2473
2474 Lo =
2475 DAG.getLoadVP(LD->getAddressingMode(), ExtType, LoVT, dl, Ch, Ptr, Offset,
2476 MaskLo, EVLLo, LoMemVT, MMO, LD->isExpandingLoad());
2477
2478 if (HiIsEmpty) {
2479 // The hi vp_load has zero storage size. We therefore simply set it to
2480 // the low vp_load and rely on subsequent removal from the chain.
2481 Hi = Lo;
2482 } else {
2483 // Generate hi vp_load.
2484 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
2485 LD->isExpandingLoad());
2486
2487 MachinePointerInfo MPI;
2488 if (LoMemVT.isScalableVector())
2489 MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace());
2490 else
2491 MPI = LD->getPointerInfo().getWithOffset(
2492 LoMemVT.getStoreSize().getFixedValue());
2493
2494 MMO = DAG.getMachineFunction().getMachineMemOperand(
2496 Alignment, LD->getAAInfo(), LD->getRanges());
2497
2498 Hi = DAG.getLoadVP(LD->getAddressingMode(), ExtType, HiVT, dl, Ch, Ptr,
2499 Offset, MaskHi, EVLHi, HiMemVT, MMO,
2500 LD->isExpandingLoad());
2501 }
2502
2503 // Build a factor node to remember that this load is independent of the
2504 // other one.
2505 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
2506 Hi.getValue(1));
2507
2508 // Legalize the chain result - switch anything that used the old chain to
2509 // use the new one.
2510 ReplaceValueWith(SDValue(LD, 1), Ch);
2511}
2512
2513void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo,
2514 SDValue &Hi) {
2515 SDLoc dl(LD);
2516 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(LD->getValueType(0));
2517
2518 SDValue Ch = LD->getChain();
2519 SDValue Ptr = LD->getBasePtr();
2520 Align Alignment = LD->getBaseAlign();
2521 SDValue Mask = LD->getMask();
2522 SDValue EVL = LD->getVectorLength();
2523
2524 // Split Mask operand
2525 SDValue MaskLo, MaskHi;
2526 if (Mask.getOpcode() == ISD::SETCC) {
2527 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
2528 } else {
2529 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
2530 GetSplitVector(Mask, MaskLo, MaskHi);
2531 else
2532 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
2533 }
2534
2535 // Split EVL operand
2536 auto [EVLLo, EVLHi] = DAG.SplitEVL(EVL, LD->getValueType(0), dl);
2537
2538 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2539 LD->getPointerInfo(), MachineMemOperand::MOLoad,
2540 LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(),
2541 LD->getRanges());
2542
2543 Lo = DAG.getLoadFFVP(LoVT, dl, Ch, Ptr, MaskLo, EVLLo, MMO);
2544
2545 // Fill the upper half with poison.
2546 Hi = DAG.getPOISON(HiVT);
2547
2548 ReplaceValueWith(SDValue(LD, 1), Lo.getValue(1));
2549 ReplaceValueWith(SDValue(LD, 2), Lo.getValue(2));
2550}
2551
2552void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD,
2553 SDValue &Lo, SDValue &Hi) {
2554 assert(SLD->isUnindexed() &&
2555 "Indexed VP strided load during type legalization!");
2556 assert(SLD->getOffset().isUndef() &&
2557 "Unexpected indexed variable-length load offset");
2558
2559 SDLoc DL(SLD);
2560
2561 EVT LoVT, HiVT;
2562 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(SLD->getValueType(0));
2563
2564 EVT LoMemVT, HiMemVT;
2565 bool HiIsEmpty = false;
2566 std::tie(LoMemVT, HiMemVT) =
2567 DAG.GetDependentSplitDestVTs(SLD->getMemoryVT(), LoVT, &HiIsEmpty);
2568
2569 SDValue Mask = SLD->getMask();
2570 SDValue LoMask, HiMask;
2571 if (Mask.getOpcode() == ISD::SETCC) {
2572 SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
2573 } else {
2574 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
2575 GetSplitVector(Mask, LoMask, HiMask);
2576 else
2577 std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
2578 }
2579
2580 SDValue LoEVL, HiEVL;
2581 std::tie(LoEVL, HiEVL) =
2582 DAG.SplitEVL(SLD->getVectorLength(), SLD->getValueType(0), DL);
2583
2584 // Generate the low vp_strided_load
2585 Lo = DAG.getStridedLoadVP(
2586 SLD->getAddressingMode(), SLD->getExtensionType(), LoVT, DL,
2587 SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(), SLD->getStride(),
2588 LoMask, LoEVL, LoMemVT, SLD->getMemOperand(), SLD->isExpandingLoad());
2589
2590 if (HiIsEmpty) {
2591 // The high vp_strided_load has zero storage size. We therefore simply set
2592 // it to the low vp_strided_load and rely on subsequent removal from the
2593 // chain.
2594 Hi = Lo;
2595 } else {
2596 // Generate the high vp_strided_load.
2597 // To calculate the high base address, we need to sum to the low base
2598 // address stride number of bytes for each element already loaded by low,
2599 // that is: Ptr = Ptr + (LoEVL * Stride)
2600 EVT PtrVT = SLD->getBasePtr().getValueType();
2602 DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
2603 DAG.getSExtOrTrunc(SLD->getStride(), DL, PtrVT));
2604 SDValue Ptr =
2605 DAG.getNode(ISD::ADD, DL, PtrVT, SLD->getBasePtr(), Increment);
2606
2607 Align Alignment = SLD->getBaseAlign();
2608 if (LoMemVT.isScalableVector())
2609 Alignment = commonAlignment(
2610 Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
2611
2612 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2613 MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()),
2615 Alignment, SLD->getAAInfo(), SLD->getRanges());
2616
2617 Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(),
2618 HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(),
2619 SLD->getStride(), HiMask, HiEVL, HiMemVT, MMO,
2620 SLD->isExpandingLoad());
2621 }
2622
2623 // Build a factor node to remember that this load is independent of the
2624 // other one.
2625 SDValue Ch = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
2626 Hi.getValue(1));
2627
2628 // Legalize the chain result - switch anything that used the old chain to
2629 // use the new one.
2630 ReplaceValueWith(SDValue(SLD, 1), Ch);
2631}
2632
2633void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
2634 SDValue &Lo, SDValue &Hi) {
2635 assert(MLD->isUnindexed() && "Indexed masked load during type legalization!");
2636 EVT LoVT, HiVT;
2637 SDLoc dl(MLD);
2638 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
2639
2640 SDValue Ch = MLD->getChain();
2641 SDValue Ptr = MLD->getBasePtr();
2642 SDValue Offset = MLD->getOffset();
2643 assert(Offset.isUndef() && "Unexpected indexed masked load offset");
2644 SDValue Mask = MLD->getMask();
2645 SDValue PassThru = MLD->getPassThru();
2646 Align Alignment = MLD->getBaseAlign();
2647 ISD::LoadExtType ExtType = MLD->getExtensionType();
2648 MachineMemOperand::Flags MMOFlags = MLD->getMemOperand()->getFlags();
2649
2650 // Split Mask operand
2651 SDValue MaskLo, MaskHi;
2652 if (Mask.getOpcode() == ISD::SETCC) {
2653 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
2654 } else {
2655 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
2656 GetSplitVector(Mask, MaskLo, MaskHi);
2657 else
2658 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
2659 }
2660
2661 EVT MemoryVT = MLD->getMemoryVT();
2662 EVT LoMemVT, HiMemVT;
2663 bool HiIsEmpty = false;
2664 std::tie(LoMemVT, HiMemVT) =
2665 DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
2666
2667 SDValue PassThruLo, PassThruHi;
2668 if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
2669 GetSplitVector(PassThru, PassThruLo, PassThruHi);
2670 else
2671 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
2672
2673 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2675 Alignment, MLD->getAAInfo(), MLD->getRanges());
2676
2677 Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
2678 MMO, MLD->getAddressingMode(), ExtType,
2679 MLD->isExpandingLoad());
2680
2681 if (HiIsEmpty) {
2682 // The hi masked load has zero storage size. We therefore simply set it to
2683 // the low masked load and rely on subsequent removal from the chain.
2684 Hi = Lo;
2685 } else {
2686 // Generate hi masked load.
2687 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
2688 MLD->isExpandingLoad());
2689
2690 MachinePointerInfo MPI;
2691 if (LoMemVT.isScalableVector())
2692 MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace());
2693 else
2694 MPI = MLD->getPointerInfo().getWithOffset(
2695 LoMemVT.getStoreSize().getFixedValue());
2696
2697 MMO = DAG.getMachineFunction().getMachineMemOperand(
2698 MPI, MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment,
2699 MLD->getAAInfo(), MLD->getRanges());
2700
2701 Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
2702 HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
2703 MLD->isExpandingLoad());
2704 }
2705
2706 // Build a factor node to remember that this load is independent of the
2707 // other one.
2708 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
2709 Hi.getValue(1));
2710
2711 // Legalize the chain result - switch anything that used the old chain to
2712 // use the new one.
2713 ReplaceValueWith(SDValue(MLD, 1), Ch);
2714
2715}
2716
2717void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
2718 SDValue &Hi, bool SplitSETCC) {
2719 EVT LoVT, HiVT;
2720 SDLoc dl(N);
2721 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2722
2723 SDValue Ch = N->getChain();
2724 SDValue Ptr = N->getBasePtr();
2725 struct Operands {
2726 SDValue Mask;
2727 SDValue Index;
2728 SDValue Scale;
2729 } Ops = [&]() -> Operands {
2730 if (auto *MSC = dyn_cast<MaskedGatherSDNode>(N)) {
2731 return {MSC->getMask(), MSC->getIndex(), MSC->getScale()};
2732 }
2733 auto *VPSC = cast<VPGatherSDNode>(N);
2734 return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale()};
2735 }();
2736
2737 EVT MemoryVT = N->getMemoryVT();
2738 Align Alignment = N->getBaseAlign();
2739
2740 // Split Mask operand
2741 SDValue MaskLo, MaskHi;
2742 if (SplitSETCC && Ops.Mask.getOpcode() == ISD::SETCC) {
2743 SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
2744 } else {
2745 std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, dl);
2746 }
2747
2748 EVT LoMemVT, HiMemVT;
2749 // Split MemoryVT
2750 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
2751
2752 SDValue IndexHi, IndexLo;
2753 if (getTypeAction(Ops.Index.getValueType()) ==
2755 GetSplitVector(Ops.Index, IndexLo, IndexHi);
2756 else
2757 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
2758
2759 MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
2760 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2761 N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
2762 Alignment, N->getAAInfo(), N->getRanges());
2763
2764 if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
2765 SDValue PassThru = MGT->getPassThru();
2766 SDValue PassThruLo, PassThruHi;
2767 if (getTypeAction(PassThru.getValueType()) ==
2769 GetSplitVector(PassThru, PassThruLo, PassThruHi);
2770 else
2771 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
2772
2773 ISD::LoadExtType ExtType = MGT->getExtensionType();
2774 ISD::MemIndexType IndexTy = MGT->getIndexType();
2775
2776 SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Ops.Scale};
2777 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
2778 OpsLo, MMO, IndexTy, ExtType);
2779
2780 SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Ops.Scale};
2781 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
2782 OpsHi, MMO, IndexTy, ExtType);
2783 } else {
2784 auto *VPGT = cast<VPGatherSDNode>(N);
2785 SDValue EVLLo, EVLHi;
2786 std::tie(EVLLo, EVLHi) =
2787 DAG.SplitEVL(VPGT->getVectorLength(), MemoryVT, dl);
2788
2789 SDValue OpsLo[] = {Ch, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
2790 Lo = DAG.getGatherVP(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
2791 MMO, VPGT->getIndexType());
2792
2793 SDValue OpsHi[] = {Ch, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
2794 Hi = DAG.getGatherVP(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
2795 MMO, VPGT->getIndexType());
2796 }
2797
2798 // Build a factor node to remember that this load is independent of the
2799 // other one.
2800 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
2801 Hi.getValue(1));
2802
2803 // Legalize the chain result - switch anything that used the old chain to
2804 // use the new one.
2805 ReplaceValueWith(SDValue(N, 1), Ch);
2806}
2807
2808void DAGTypeLegalizer::SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo,
2809 SDValue &Hi) {
2810 // This is not "trivial", as there is a dependency between the two subvectors.
2811 // Depending on the number of 1s in the mask, the elements from the Hi vector
2812 // need to be moved to the Lo vector. Passthru values make this even harder.
2813 // We try to use VECTOR_COMPRESS if the target has custom lowering with
2814 // smaller types and passthru is undef, as it is most likely faster than the
2815 // fully expand path. Otherwise, just do the full expansion as one "big"
2816 // operation and then extract the Lo and Hi vectors from that. This gets
2817 // rid of VECTOR_COMPRESS and all other operands can be legalized later.
2818 SDLoc DL(N);
2819 EVT VecVT = N->getValueType(0);
2820
2821 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
2822 bool HasCustomLowering = false;
2823 EVT CheckVT = LoVT;
2824 while (CheckVT.getVectorMinNumElements() > 1) {
2825 // TLI.isOperationLegalOrCustom requires a legal type, but we could have a
2826 // custom lowering for illegal types. So we do the checks separately.
2827 if (TLI.isOperationLegal(ISD::VECTOR_COMPRESS, CheckVT) ||
2828 TLI.isOperationCustom(ISD::VECTOR_COMPRESS, CheckVT)) {
2829 HasCustomLowering = true;
2830 break;
2831 }
2832 CheckVT = CheckVT.getHalfNumVectorElementsVT(*DAG.getContext());
2833 }
2834
2835 SDValue Passthru = N->getOperand(2);
2836 if (!HasCustomLowering) {
2837 SDValue Compressed = TLI.expandVECTOR_COMPRESS(N, DAG);
2838 std::tie(Lo, Hi) = DAG.SplitVector(Compressed, DL, LoVT, HiVT);
2839 return;
2840 }
2841
2842 // Try to VECTOR_COMPRESS smaller vectors and combine via a stack store+load.
2843 SDValue Mask = N->getOperand(1);
2844 SDValue LoMask, HiMask;
2845 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
2846 std::tie(LoMask, HiMask) = SplitMask(Mask);
2847
2848 SDValue UndefPassthru = DAG.getPOISON(LoVT);
2849 Lo = DAG.getNode(ISD::VECTOR_COMPRESS, DL, LoVT, Lo, LoMask, UndefPassthru);
2850 Hi = DAG.getNode(ISD::VECTOR_COMPRESS, DL, HiVT, Hi, HiMask, UndefPassthru);
2851
2852 SDValue StackPtr = DAG.CreateStackTemporary(
2853 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
2854 MachineFunction &MF = DAG.getMachineFunction();
2855 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(
2856 MF, cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex());
2857
2858 EVT MaskVT = LoMask.getValueType();
2859 assert(MaskVT.getScalarType() == MVT::i1 && "Expected vector of i1s");
2860
2861 // We store LoVec and then insert HiVec starting at offset=|1s| in LoMask.
2862 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
2863 MaskVT.getVectorElementCount());
2864 SDValue WideMask = DAG.getNode(ISD::ZERO_EXTEND, DL, WideMaskVT, LoMask);
2865 SDValue Offset = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, WideMask);
2866 Offset = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Offset);
2867
2868 SDValue Chain = DAG.getEntryNode();
2869 Chain = DAG.getStore(Chain, DL, Lo, StackPtr, PtrInfo);
2870 Chain = DAG.getStore(Chain, DL, Hi, Offset,
2872
2873 SDValue Compressed = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
2874 if (!Passthru.isUndef()) {
2875 Compressed =
2876 DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, Compressed, Passthru);
2877 }
2878 std::tie(Lo, Hi) = DAG.SplitVector(Compressed, DL);
2879}
2880
2881void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
2882 assert(N->getValueType(0).isVector() &&
2883 N->getOperand(0).getValueType().isVector() &&
2884 "Operand types must be vectors");
2885
2886 EVT LoVT, HiVT;
2887 SDLoc DL(N);
2888 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2889
2890 // If the input also splits, handle it directly. Otherwise split it by hand.
2891 SDValue LL, LH, RL, RH;
2892 if (getTypeAction(N->getOperand(0).getValueType()) ==
2894 GetSplitVector(N->getOperand(0), LL, LH);
2895 else
2896 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
2897
2898 if (getTypeAction(N->getOperand(1).getValueType()) ==
2900 GetSplitVector(N->getOperand(1), RL, RH);
2901 else
2902 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
2903
2904 if (N->getOpcode() == ISD::SETCC) {
2905 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
2906 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
2907 } else {
2908 assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
2909 SDValue MaskLo, MaskHi, EVLLo, EVLHi;
2910 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
2911 std::tie(EVLLo, EVLHi) =
2912 DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
2913 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2), MaskLo,
2914 EVLLo);
2915 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2), MaskHi,
2916 EVLHi);
2917 }
2918}
2919
2920void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
2921 SDValue &Hi) {
2922 // Get the dest types - they may not match the input types, e.g. int_to_fp.
2923 EVT LoVT, HiVT;
2924 SDLoc dl(N);
2925 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2926
2927 // If the input also splits, handle it directly for a compile time speedup.
2928 // Otherwise split it by hand.
2929 EVT InVT = N->getOperand(0).getValueType();
2930 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
2931 GetSplitVector(N->getOperand(0), Lo, Hi);
2932 else
2933 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
2934
2935 const SDNodeFlags Flags = N->getFlags();
2936 unsigned Opcode = N->getOpcode();
2937 if (N->getNumOperands() <= 2) {
2938 if (Opcode == ISD::FP_ROUND || Opcode == ISD::AssertNoFPClass ||
2940 Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags);
2941 Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags);
2942 } else {
2943 Lo = DAG.getNode(Opcode, dl, LoVT, Lo, Flags);
2944 Hi = DAG.getNode(Opcode, dl, HiVT, Hi, Flags);
2945 }
2946 return;
2947 }
2948
2949 assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
2950 assert(N->isVPOpcode() && "Expected VP opcode");
2951
2952 SDValue MaskLo, MaskHi;
2953 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
2954
2955 SDValue EVLLo, EVLHi;
2956 std::tie(EVLLo, EVLHi) =
2957 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
2958
2959 Lo = DAG.getNode(Opcode, dl, LoVT, {Lo, MaskLo, EVLLo}, Flags);
2960 Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags);
2961}
2962
2963void DAGTypeLegalizer::SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo,
2964 SDValue &Hi) {
2965 SDLoc dl(N);
2966 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
2967
2968 // If the input also splits, handle it directly for a compile time speedup.
2969 // Otherwise split it by hand.
2970 EVT InVT = N->getOperand(0).getValueType();
2971 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
2972 GetSplitVector(N->getOperand(0), Lo, Hi);
2973 else
2974 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
2975
2976 auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N);
2977 unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace();
2978 unsigned DestAS = AddrSpaceCastN->getDestAddressSpace();
2979 Lo = DAG.getAddrSpaceCast(dl, LoVT, Lo, SrcAS, DestAS);
2980 Hi = DAG.getAddrSpaceCast(dl, HiVT, Hi, SrcAS, DestAS);
2981}
2982
2983void DAGTypeLegalizer::SplitVecRes_UnaryOpWithTwoResults(SDNode *N,
2984 unsigned ResNo,
2985 SDValue &Lo,
2986 SDValue &Hi) {
2987 SDLoc dl(N);
2988 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
2989 auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1));
2990
2991 // If the input also splits, handle it directly for a compile time speedup.
2992 // Otherwise split it by hand.
2993 EVT InVT = N->getOperand(0).getValueType();
2994 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
2995 GetSplitVector(N->getOperand(0), Lo, Hi);
2996 else
2997 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
2998
2999 Lo = DAG.getNode(N->getOpcode(), dl, {LoVT, LoVT1}, Lo, N->getFlags());
3000 Hi = DAG.getNode(N->getOpcode(), dl, {HiVT, HiVT1}, Hi, N->getFlags());
3001
3002 SDNode *HiNode = Hi.getNode();
3003 SDNode *LoNode = Lo.getNode();
3004
3005 // Replace the other vector result not being explicitly split here.
3006 unsigned OtherNo = 1 - ResNo;
3007 EVT OtherVT = N->getValueType(OtherNo);
3008 if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
3009 SetSplitVector(SDValue(N, OtherNo), SDValue(LoNode, OtherNo),
3010 SDValue(HiNode, OtherNo));
3011 } else {
3012 SDValue OtherVal =
3013 DAG.getNode(ISD::CONCAT_VECTORS, dl, OtherVT, SDValue(LoNode, OtherNo),
3014 SDValue(HiNode, OtherNo));
3015 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
3016 }
3017}
3018
3019void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
3020 SDValue &Hi) {
3021 SDLoc dl(N);
3022 EVT SrcVT = N->getOperand(0).getValueType();
3023 EVT DestVT = N->getValueType(0);
3024 EVT LoVT, HiVT;
3025 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
3026
3027 // We can do better than a generic split operation if the extend is doing
3028 // more than just doubling the width of the elements and the following are
3029 // true:
3030 // - The number of vector elements is even,
3031 // - the source type is legal,
3032 // - the type of a split source is illegal,
3033 // - the type of an extended (by doubling element size) source is legal, and
3034 // - the type of that extended source when split is legal.
3035 //
3036 // This won't necessarily completely legalize the operation, but it will
3037 // more effectively move in the right direction and prevent falling down
3038 // to scalarization in many cases due to the input vector being split too
3039 // far.
3040 if (SrcVT.getVectorElementCount().isKnownEven() &&
3041 SrcVT.getScalarSizeInBits() * 2 < DestVT.getScalarSizeInBits()) {
3042 LLVMContext &Ctx = *DAG.getContext();
3043 EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
3044 EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);
3045
3046 EVT SplitLoVT, SplitHiVT;
3047 std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
3048 if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
3049 TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
3050 LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:";
3051 N->dump(&DAG); dbgs() << "\n");
3052 if (!N->isVPOpcode()) {
3053 // Extend the source vector by one step.
3054 SDValue NewSrc =
3055 DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
3056 // Get the low and high halves of the new, extended one step, vector.
3057 std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
3058 // Extend those vector halves the rest of the way.
3059 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
3060 Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
3061 return;
3062 }
3063
3064 // Extend the source vector by one step.
3065 SDValue NewSrc =
3066 DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0),
3067 N->getOperand(1), N->getOperand(2));
3068 // Get the low and high halves of the new, extended one step, vector.
3069 std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
3070
3071 SDValue MaskLo, MaskHi;
3072 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
3073
3074 SDValue EVLLo, EVLHi;
3075 std::tie(EVLLo, EVLHi) =
3076 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
3077 // Extend those vector halves the rest of the way.
3078 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, {Lo, MaskLo, EVLLo});
3079 Hi = DAG.getNode(N->getOpcode(), dl, HiVT, {Hi, MaskHi, EVLHi});
3080 return;
3081 }
3082 }
3083 // Fall back to the generic unary operator splitting otherwise.
3084 SplitVecRes_UnaryOp(N, Lo, Hi);
3085}
3086
3087void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
3088 SDValue &Lo, SDValue &Hi) {
3089 // The low and high parts of the original input give four input vectors.
3090 SDValue Inputs[4];
3091 SDLoc DL(N);
3092 GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
3093 GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
3094 EVT NewVT = Inputs[0].getValueType();
3095 unsigned NewElts = NewVT.getVectorNumElements();
3096
3097 auto &&IsConstant = [](const SDValue &N) {
3098 APInt SplatValue;
3099 return N.getResNo() == 0 &&
3100 (ISD::isConstantSplatVector(N.getNode(), SplatValue) ||
3102 };
3103 auto &&BuildVector = [NewElts, &DAG = DAG, NewVT, &DL](SDValue &Input1,
3104 SDValue &Input2,
3105 ArrayRef<int> Mask) {
3106 assert(Input1->getOpcode() == ISD::BUILD_VECTOR &&
3107 Input2->getOpcode() == ISD::BUILD_VECTOR &&
3108 "Expected build vector node.");
3109 EVT EltVT = NewVT.getVectorElementType();
3110 SmallVector<SDValue> Ops(NewElts, DAG.getPOISON(EltVT));
3111 for (unsigned I = 0; I < NewElts; ++I) {
3112 if (Mask[I] == PoisonMaskElem)
3113 continue;
3114 unsigned Idx = Mask[I];
3115 if (Idx >= NewElts)
3116 Ops[I] = Input2.getOperand(Idx - NewElts);
3117 else
3118 Ops[I] = Input1.getOperand(Idx);
3119 // Make the type of all elements the same as the element type.
3120 if (Ops[I].getValueType().bitsGT(EltVT))
3121 Ops[I] = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Ops[I]);
3122 }
3123 return DAG.getBuildVector(NewVT, DL, Ops);
3124 };
3125
3126 // If Lo or Hi uses elements from at most two of the four input vectors, then
3127 // express it as a vector shuffle of those two inputs. Otherwise extract the
3128 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
3129 SmallVector<int> OrigMask(N->getMask());
3130 // Try to pack incoming shuffles/inputs.
3131 auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts,
3132 &DL](SmallVectorImpl<int> &Mask) {
3133 // Check if all inputs are shuffles of the same operands or non-shuffles.
3134 MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs;
3135 for (unsigned Idx = 0; Idx < std::size(Inputs); ++Idx) {
3136 SDValue Input = Inputs[Idx];
3137 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode());
3138 if (!Shuffle ||
3139 Input.getOperand(0).getValueType() != Input.getValueType())
3140 continue;
3141 ShufflesIdxs[std::make_pair(Input.getOperand(0), Input.getOperand(1))]
3142 .push_back(Idx);
3143 ShufflesIdxs[std::make_pair(Input.getOperand(1), Input.getOperand(0))]
3144 .push_back(Idx);
3145 }
3146 for (auto &P : ShufflesIdxs) {
3147 if (P.second.size() < 2)
3148 continue;
3149 // Use shuffles operands instead of shuffles themselves.
3150 // 1. Adjust mask.
3151 for (int &Idx : Mask) {
3152 if (Idx == PoisonMaskElem)
3153 continue;
3154 unsigned SrcRegIdx = Idx / NewElts;
3155 if (Inputs[SrcRegIdx].isUndef()) {
3156 Idx = PoisonMaskElem;
3157 continue;
3158 }
3159 auto *Shuffle =
3160 dyn_cast<ShuffleVectorSDNode>(Inputs[SrcRegIdx].getNode());
3161 if (!Shuffle || !is_contained(P.second, SrcRegIdx))
3162 continue;
3163 int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
3164 if (MaskElt == PoisonMaskElem) {
3165 Idx = PoisonMaskElem;
3166 continue;
3167 }
3168 Idx = MaskElt % NewElts +
3169 P.second[Shuffle->getOperand(MaskElt / NewElts) == P.first.first
3170 ? 0
3171 : 1] *
3172 NewElts;
3173 }
3174 // 2. Update inputs.
3175 Inputs[P.second[0]] = P.first.first;
3176 Inputs[P.second[1]] = P.first.second;
3177 // Clear the pair data.
3178 P.second.clear();
3179 ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear();
3180 }
3181 // Check if any concat_vectors can be simplified.
3182 SmallBitVector UsedSubVector(2 * std::size(Inputs));
3183 for (int &Idx : Mask) {
3184 if (Idx == PoisonMaskElem)
3185 continue;
3186 unsigned SrcRegIdx = Idx / NewElts;
3187 if (Inputs[SrcRegIdx].isUndef()) {
3188 Idx = PoisonMaskElem;
3189 continue;
3190 }
3192 getTypeAction(Inputs[SrcRegIdx].getValueType());
3193 if (Inputs[SrcRegIdx].getOpcode() == ISD::CONCAT_VECTORS &&
3194 Inputs[SrcRegIdx].getNumOperands() == 2 &&
3195 !Inputs[SrcRegIdx].getOperand(1).isUndef() &&
3196 (TypeAction == TargetLowering::TypeLegal ||
3197 TypeAction == TargetLowering::TypeWidenVector))
3198 UsedSubVector.set(2 * SrcRegIdx + (Idx % NewElts) / (NewElts / 2));
3199 }
3200 if (UsedSubVector.count() > 1) {
3202 for (unsigned I = 0; I < std::size(Inputs); ++I) {
3203 if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1))
3204 continue;
3205 if (Pairs.empty() || Pairs.back().size() == 2)
3206 Pairs.emplace_back();
3207 if (UsedSubVector.test(2 * I)) {
3208 Pairs.back().emplace_back(I, 0);
3209 } else {
3210 assert(UsedSubVector.test(2 * I + 1) &&
3211 "Expected to be used one of the subvectors.");
3212 Pairs.back().emplace_back(I, 1);
3213 }
3214 }
3215 if (!Pairs.empty() && Pairs.front().size() > 1) {
3216 // Adjust mask.
3217 for (int &Idx : Mask) {
3218 if (Idx == PoisonMaskElem)
3219 continue;
3220 unsigned SrcRegIdx = Idx / NewElts;
3221 auto *It = find_if(
3222 Pairs, [SrcRegIdx](ArrayRef<std::pair<unsigned, int>> Idxs) {
3223 return Idxs.front().first == SrcRegIdx ||
3224 Idxs.back().first == SrcRegIdx;
3225 });
3226 if (It == Pairs.end())
3227 continue;
3228 Idx = It->front().first * NewElts + (Idx % NewElts) % (NewElts / 2) +
3229 (SrcRegIdx == It->front().first ? 0 : (NewElts / 2));
3230 }
3231 // Adjust inputs.
3232 for (ArrayRef<std::pair<unsigned, int>> Idxs : Pairs) {
3233 Inputs[Idxs.front().first] = DAG.getNode(
3235 Inputs[Idxs.front().first].getValueType(),
3236 Inputs[Idxs.front().first].getOperand(Idxs.front().second),
3237 Inputs[Idxs.back().first].getOperand(Idxs.back().second));
3238 }
3239 }
3240 }
3241 bool Changed;
3242 do {
3243 // Try to remove extra shuffles (except broadcasts) and shuffles with the
3244 // reused operands.
3245 Changed = false;
3246 for (unsigned I = 0; I < std::size(Inputs); ++I) {
3247 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode());
3248 if (!Shuffle)
3249 continue;
3250 if (Shuffle->getOperand(0).getValueType() != NewVT)
3251 continue;
3252 int Op = -1;
3253 if (!Inputs[I].hasOneUse() && Shuffle->getOperand(1).isUndef() &&
3254 !Shuffle->isSplat()) {
3255 Op = 0;
3256 } else if (!Inputs[I].hasOneUse() &&
3257 !Shuffle->getOperand(1).isUndef()) {
3258 // Find the only used operand, if possible.
3259 for (int &Idx : Mask) {
3260 if (Idx == PoisonMaskElem)
3261 continue;
3262 unsigned SrcRegIdx = Idx / NewElts;
3263 if (SrcRegIdx != I)
3264 continue;
3265 int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
3266 if (MaskElt == PoisonMaskElem) {
3267 Idx = PoisonMaskElem;
3268 continue;
3269 }
3270 int OpIdx = MaskElt / NewElts;
3271 if (Op == -1) {
3272 Op = OpIdx;
3273 continue;
3274 }
3275 if (Op != OpIdx) {
3276 Op = -1;
3277 break;
3278 }
3279 }
3280 }
3281 if (Op < 0) {
3282 // Try to check if one of the shuffle operands is used already.
3283 for (int OpIdx = 0; OpIdx < 2; ++OpIdx) {
3284 if (Shuffle->getOperand(OpIdx).isUndef())
3285 continue;
3286 auto *It = find(Inputs, Shuffle->getOperand(OpIdx));
3287 if (It == std::end(Inputs))
3288 continue;
3289 int FoundOp = std::distance(std::begin(Inputs), It);
3290 // Found that operand is used already.
3291 // 1. Fix the mask for the reused operand.
3292 for (int &Idx : Mask) {
3293 if (Idx == PoisonMaskElem)
3294 continue;
3295 unsigned SrcRegIdx = Idx / NewElts;
3296 if (SrcRegIdx != I)
3297 continue;
3298 int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
3299 if (MaskElt == PoisonMaskElem) {
3300 Idx = PoisonMaskElem;
3301 continue;
3302 }
3303 int MaskIdx = MaskElt / NewElts;
3304 if (OpIdx == MaskIdx)
3305 Idx = MaskElt % NewElts + FoundOp * NewElts;
3306 }
3307 // 2. Set Op to the unused OpIdx.
3308 Op = (OpIdx + 1) % 2;
3309 break;
3310 }
3311 }
3312 if (Op >= 0) {
3313 Changed = true;
3314 Inputs[I] = Shuffle->getOperand(Op);
3315 // Adjust mask.
3316 for (int &Idx : Mask) {
3317 if (Idx == PoisonMaskElem)
3318 continue;
3319 unsigned SrcRegIdx = Idx / NewElts;
3320 if (SrcRegIdx != I)
3321 continue;
3322 int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
3323 int OpIdx = MaskElt / NewElts;
3324 if (OpIdx != Op)
3325 continue;
3326 Idx = MaskElt % NewElts + SrcRegIdx * NewElts;
3327 }
3328 }
3329 }
3330 } while (Changed);
3331 };
3332 TryPeekThroughShufflesInputs(OrigMask);
3333 // Proces unique inputs.
3334 auto &&MakeUniqueInputs = [&Inputs, &IsConstant,
3335 NewElts](SmallVectorImpl<int> &Mask) {
3336 SetVector<SDValue> UniqueInputs;
3337 SetVector<SDValue> UniqueConstantInputs;
3338 for (const auto &I : Inputs) {
3339 if (IsConstant(I))
3340 UniqueConstantInputs.insert(I);
3341 else if (!I.isUndef())
3342 UniqueInputs.insert(I);
3343 }
3344 // Adjust mask in case of reused inputs. Also, need to insert constant
3345 // inputs at first, otherwise it affects the final outcome.
3346 if (UniqueInputs.size() != std::size(Inputs)) {
3347 auto &&UniqueVec = UniqueInputs.takeVector();
3348 auto &&UniqueConstantVec = UniqueConstantInputs.takeVector();
3349 unsigned ConstNum = UniqueConstantVec.size();
3350 for (int &Idx : Mask) {
3351 if (Idx == PoisonMaskElem)
3352 continue;
3353 unsigned SrcRegIdx = Idx / NewElts;
3354 if (Inputs[SrcRegIdx].isUndef()) {
3355 Idx = PoisonMaskElem;
3356 continue;
3357 }
3358 const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]);
3359 if (It != UniqueConstantVec.end()) {
3360 Idx = (Idx % NewElts) +
3361 NewElts * std::distance(UniqueConstantVec.begin(), It);
3362 assert(Idx >= 0 && "Expected defined mask idx.");
3363 continue;
3364 }
3365 const auto RegIt = find(UniqueVec, Inputs[SrcRegIdx]);
3366 assert(RegIt != UniqueVec.end() && "Cannot find non-const value.");
3367 Idx = (Idx % NewElts) +
3368 NewElts * (std::distance(UniqueVec.begin(), RegIt) + ConstNum);
3369 assert(Idx >= 0 && "Expected defined mask idx.");
3370 }
3371 copy(UniqueConstantVec, std::begin(Inputs));
3372 copy(UniqueVec, std::next(std::begin(Inputs), ConstNum));
3373 }
3374 };
3375 MakeUniqueInputs(OrigMask);
3376 SDValue OrigInputs[4];
3377 copy(Inputs, std::begin(OrigInputs));
3378 for (unsigned High = 0; High < 2; ++High) {
3379 SDValue &Output = High ? Hi : Lo;
3380
3381 // Build a shuffle mask for the output, discovering on the fly which
3382 // input vectors to use as shuffle operands.
3383 unsigned FirstMaskIdx = High * NewElts;
3384 SmallVector<int> Mask(NewElts * std::size(Inputs), PoisonMaskElem);
3385 copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
3386 assert(!Output && "Expected default initialized initial value.");
3387 TryPeekThroughShufflesInputs(Mask);
3388 MakeUniqueInputs(Mask);
3389 SDValue TmpInputs[4];
3390 copy(Inputs, std::begin(TmpInputs));
3391 // Track changes in the output registers.
3392 int UsedIdx = -1;
3393 bool SecondIteration = false;
3394 auto &&AccumulateResults = [&UsedIdx, &SecondIteration](unsigned Idx) {
3395 if (UsedIdx < 0) {
3396 UsedIdx = Idx;
3397 return false;
3398 }
3399 if (UsedIdx >= 0 && static_cast<unsigned>(UsedIdx) == Idx)
3400 SecondIteration = true;
3401 return SecondIteration;
3402 };
3404 Mask, std::size(Inputs), std::size(Inputs),
3405 /*NumOfUsedRegs=*/1,
3406 [&Output, &DAG = DAG, NewVT]() { Output = DAG.getPOISON(NewVT); },
3407 [&Output, &DAG = DAG, NewVT, &DL, &Inputs,
3408 &BuildVector](ArrayRef<int> Mask, unsigned Idx, unsigned /*Unused*/) {
3409 if (Inputs[Idx]->getOpcode() == ISD::BUILD_VECTOR)
3410 Output = BuildVector(Inputs[Idx], Inputs[Idx], Mask);
3411 else
3412 Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx],
3413 DAG.getPOISON(NewVT), Mask);
3414 Inputs[Idx] = Output;
3415 },
3416 [&AccumulateResults, &Output, &DAG = DAG, NewVT, &DL, &Inputs,
3417 &TmpInputs, &BuildVector](ArrayRef<int> Mask, unsigned Idx1,
3418 unsigned Idx2, bool /*Unused*/) {
3419 if (AccumulateResults(Idx1)) {
3420 if (Inputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
3421 Inputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
3422 Output = BuildVector(Inputs[Idx1], Inputs[Idx2], Mask);
3423 else
3424 Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx1],
3425 Inputs[Idx2], Mask);
3426 } else {
3427 if (TmpInputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
3428 TmpInputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
3429 Output = BuildVector(TmpInputs[Idx1], TmpInputs[Idx2], Mask);
3430 else
3431 Output = DAG.getVectorShuffle(NewVT, DL, TmpInputs[Idx1],
3432 TmpInputs[Idx2], Mask);
3433 }
3434 Inputs[Idx1] = Output;
3435 });
3436 copy(OrigInputs, std::begin(Inputs));
3437 }
3438}
3439
3440void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
3441 EVT OVT = N->getValueType(0);
3442 EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext());
3443 SDValue Chain = N->getOperand(0);
3444 SDValue Ptr = N->getOperand(1);
3445 SDValue SV = N->getOperand(2);
3446 SDLoc dl(N);
3447
3448 const Align Alignment =
3449 DAG.getDataLayout().getABITypeAlign(NVT.getTypeForEVT(*DAG.getContext()));
3450
3451 Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment.value());
3452 Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment.value());
3453 Chain = Hi.getValue(1);
3454
3455 // Modified the chain - switch anything that used the old chain to use
3456 // the new one.
3457 ReplaceValueWith(SDValue(N, 1), Chain);
3458}
3459
3460void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
3461 SDValue &Hi) {
3462 EVT DstVTLo, DstVTHi;
3463 std::tie(DstVTLo, DstVTHi) = DAG.GetSplitDestVTs(N->getValueType(0));
3464 SDLoc dl(N);
3465
3466 SDValue SrcLo, SrcHi;
3467 EVT SrcVT = N->getOperand(0).getValueType();
3468 if (getTypeAction(SrcVT) == TargetLowering::TypeSplitVector)
3469 GetSplitVector(N->getOperand(0), SrcLo, SrcHi);
3470 else
3471 std::tie(SrcLo, SrcHi) = DAG.SplitVectorOperand(N, 0);
3472
3473 Lo = DAG.getNode(N->getOpcode(), dl, DstVTLo, SrcLo, N->getOperand(1));
3474 Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1));
3475}
3476
3477void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
3478 SDValue &Hi) {
3479 SDValue InLo, InHi;
3480 GetSplitVector(N->getOperand(0), InLo, InHi);
3481 SDLoc DL(N);
3482
3483 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
3484 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
3485}
3486
3487void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
3488 SDValue &Hi) {
3489 SDLoc DL(N);
3490
3491 SDValue Expanded = TLI.expandVectorSplice(N, DAG);
3492 std::tie(Lo, Hi) = DAG.SplitVector(Expanded, DL);
3493}
3494
3495void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo,
3496 SDValue &Hi) {
3497 EVT VT = N->getValueType(0);
3498 SDValue Val = N->getOperand(0);
3499 SDValue Mask = N->getOperand(1);
3500 SDValue EVL = N->getOperand(2);
3501 SDLoc DL(N);
3502
3503 // Fallback to VP_STRIDED_STORE to stack followed by VP_LOAD.
3504 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
3505
3506 EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
3508 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
3509 EVT PtrVT = StackPtr.getValueType();
3510 auto &MF = DAG.getMachineFunction();
3511 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
3512 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
3513
3514 MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
3516 Alignment);
3517 MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
3519 Alignment);
3520
3521 unsigned EltWidth = VT.getScalarSizeInBits() / 8;
3522 SDValue NumElemMinus1 =
3523 DAG.getNode(ISD::SUB, DL, PtrVT, DAG.getZExtOrTrunc(EVL, DL, PtrVT),
3524 DAG.getConstant(1, DL, PtrVT));
3525 SDValue StartOffset = DAG.getNode(ISD::MUL, DL, PtrVT, NumElemMinus1,
3526 DAG.getConstant(EltWidth, DL, PtrVT));
3527 SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, StartOffset);
3528 SDValue Stride = DAG.getConstant(-(int64_t)EltWidth, DL, PtrVT);
3529
3530 SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT);
3531 SDValue Store = DAG.getStridedStoreVP(DAG.getEntryNode(), DL, Val, StorePtr,
3532 DAG.getPOISON(PtrVT), Stride, TrueMask,
3533 EVL, MemVT, StoreMMO, ISD::UNINDEXED);
3534
3535 SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO);
3536
3537 std::tie(Lo, Hi) = DAG.SplitVector(Load, DL);
3538}
3539
3540void DAGTypeLegalizer::SplitVecRes_VP_SPLICE(SDNode *N, SDValue &Lo,
3541 SDValue &Hi) {
3542 EVT VT = N->getValueType(0);
3543 SDValue V1 = N->getOperand(0);
3544 SDValue V2 = N->getOperand(1);
3545 int64_t Imm = cast<ConstantSDNode>(N->getOperand(2))->getSExtValue();
3546 SDValue Mask = N->getOperand(3);
3547 SDValue EVL1 = N->getOperand(4);
3548 SDValue EVL2 = N->getOperand(5);
3549 SDLoc DL(N);
3550
3551 // Since EVL2 is considered the real VL it gets promoted during
3552 // SelectionDAGBuilder. Promote EVL1 here if needed.
3553 if (getTypeAction(EVL1.getValueType()) == TargetLowering::TypePromoteInteger)
3554 EVL1 = ZExtPromotedInteger(EVL1);
3555
3556 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
3557
3558 EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
3559 VT.getVectorElementCount() * 2);
3560 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
3561 EVT PtrVT = StackPtr.getValueType();
3562 auto &MF = DAG.getMachineFunction();
3563 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
3564 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
3565
3566 MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
3568 Alignment);
3569 MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
3571 Alignment);
3572
3573 SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, EVL1);
3574 SDValue PoisonPtr = DAG.getPOISON(PtrVT);
3575
3576 SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT);
3577 SDValue StoreV1 =
3578 DAG.getStoreVP(DAG.getEntryNode(), DL, V1, StackPtr, PoisonPtr, TrueMask,
3579 EVL1, V1.getValueType(), StoreMMO, ISD::UNINDEXED);
3580
3582 DAG.getStoreVP(StoreV1, DL, V2, StackPtr2, PoisonPtr, TrueMask, EVL2,
3583 V2.getValueType(), StoreMMO, ISD::UNINDEXED);
3584
3585 SDValue Load;
3586 if (Imm >= 0) {
3587 StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VT, N->getOperand(2));
3588 Load = DAG.getLoadVP(VT, DL, StoreV2, StackPtr, Mask, EVL2, LoadMMO);
3589 } else {
3590 uint64_t TrailingElts = -Imm;
3591 unsigned EltWidth = VT.getScalarSizeInBits() / 8;
3592 SDValue TrailingBytes = DAG.getConstant(TrailingElts * EltWidth, DL, PtrVT);
3593
3594 // Make sure TrailingBytes doesn't exceed the size of vec1.
3595 SDValue OffsetToV2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, StackPtr);
3596 TrailingBytes =
3597 DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, OffsetToV2);
3598
3599 // Calculate the start address of the spliced result.
3600 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
3601 Load = DAG.getLoadVP(VT, DL, StoreV2, StackPtr2, Mask, EVL2, LoadMMO);
3602 }
3603
3604 EVT LoVT, HiVT;
3605 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
3606 Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Load,
3607 DAG.getVectorIdxConstant(0, DL));
3608 Hi =
3609 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Load,
3610 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
3611}
3612
3613void DAGTypeLegalizer::SplitVecRes_PARTIAL_REDUCE_MLA(SDNode *N, SDValue &Lo,
3614 SDValue &Hi) {
3615 SDLoc DL(N);
3616 SDValue Acc = N->getOperand(0);
3617 SDValue Input1 = N->getOperand(1);
3618 SDValue Input2 = N->getOperand(2);
3619
3620 SDValue AccLo, AccHi;
3621 GetSplitVector(Acc, AccLo, AccHi);
3622 unsigned Opcode = N->getOpcode();
3623
3624 // If the input types don't need splitting, just accumulate into the
3625 // low part of the accumulator.
3626 if (getTypeAction(Input1.getValueType()) != TargetLowering::TypeSplitVector) {
3627 Lo = DAG.getNode(Opcode, DL, AccLo.getValueType(), AccLo, Input1, Input2);
3628 Hi = AccHi;
3629 return;
3630 }
3631
3632 SDValue Input1Lo, Input1Hi;
3633 SDValue Input2Lo, Input2Hi;
3634 GetSplitVector(Input1, Input1Lo, Input1Hi);
3635 GetSplitVector(Input2, Input2Lo, Input2Hi);
3636 EVT ResultVT = AccLo.getValueType();
3637
3638 Lo = DAG.getNode(Opcode, DL, ResultVT, AccLo, Input1Lo, Input2Lo);
3639 Hi = DAG.getNode(Opcode, DL, ResultVT, AccHi, Input1Hi, Input2Hi);
3640}
3641
3642void DAGTypeLegalizer::SplitVecRes_GET_ACTIVE_LANE_MASK(SDNode *N, SDValue &Lo,
3643 SDValue &Hi) {
3644 SDLoc DL(N);
3645 SDValue Op0 = N->getOperand(0);
3646 SDValue Op1 = N->getOperand(1);
3647 EVT OpVT = Op0.getValueType();
3648
3649 EVT LoVT, HiVT;
3650 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
3651
3652 Lo = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, LoVT, Op0, Op1);
3653 SDValue LoElts = DAG.getElementCount(DL, OpVT, LoVT.getVectorElementCount());
3654 SDValue HiStartVal = DAG.getNode(ISD::UADDSAT, DL, OpVT, Op0, LoElts);
3655 Hi = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, HiVT, HiStartVal, Op1);
3656}
3657
3658void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) {
3659 unsigned Factor = N->getNumOperands();
3660
3661 SmallVector<SDValue, 8> Ops(Factor * 2);
3662 for (unsigned i = 0; i != Factor; ++i) {
3663 SDValue OpLo, OpHi;
3664 GetSplitVector(N->getOperand(i), OpLo, OpHi);
3665 Ops[i * 2] = OpLo;
3666 Ops[i * 2 + 1] = OpHi;
3667 }
3668
3669 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
3670
3671 SDLoc DL(N);
3672 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs,
3673 ArrayRef(Ops).slice(0, Factor));
3674 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs,
3675 ArrayRef(Ops).slice(Factor, Factor));
3676
3677 for (unsigned i = 0; i != Factor; ++i)
3678 SetSplitVector(SDValue(N, i), ResLo.getValue(i), ResHi.getValue(i));
3679}
3680
3681void DAGTypeLegalizer::SplitVecRes_VECTOR_INTERLEAVE(SDNode *N) {
3682 unsigned Factor = N->getNumOperands();
3683
3684 SmallVector<SDValue, 8> Ops(Factor * 2);
3685 for (unsigned i = 0; i != Factor; ++i) {
3686 SDValue OpLo, OpHi;
3687 GetSplitVector(N->getOperand(i), OpLo, OpHi);
3688 Ops[i] = OpLo;
3689 Ops[i + Factor] = OpHi;
3690 }
3691
3692 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
3693
3694 SDLoc DL(N);
3695 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
3696 ArrayRef(Ops).slice(0, Factor)),
3697 DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
3698 ArrayRef(Ops).slice(Factor, Factor))};
3699
3700 for (unsigned i = 0; i != Factor; ++i) {
3701 unsigned IdxLo = 2 * i;
3702 unsigned IdxHi = 2 * i + 1;
3703 SetSplitVector(SDValue(N, i), Res[IdxLo / Factor].getValue(IdxLo % Factor),
3704 Res[IdxHi / Factor].getValue(IdxHi % Factor));
3705 }
3706}
3707
3708//===----------------------------------------------------------------------===//
3709// Operand Vector Splitting
3710//===----------------------------------------------------------------------===//
3711
3712/// This method is called when the specified operand of the specified node is
3713/// found to need vector splitting. At this point, all of the result types of
3714/// the node are known to be legal, but other operands of the node may need
3715/// legalization as well as the specified one.
3716bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
3717 LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG));
3718 SDValue Res = SDValue();
3719
3720 // See if the target wants to custom split this node.
3721 if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
3722 return false;
3723
3724 switch (N->getOpcode()) {
3725 default:
3726#ifndef NDEBUG
3727 dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
3728 N->dump(&DAG);
3729 dbgs() << "\n";
3730#endif
3731 report_fatal_error("Do not know how to split this operator's "
3732 "operand!\n");
3733
3734 case ISD::VP_SETCC:
3735 case ISD::STRICT_FSETCC:
3737 case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
3738 case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
3739 case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
3740 case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break;
3741 case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
3742 case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
3744 Res = SplitVecOp_VECTOR_FIND_LAST_ACTIVE(N);
3745 break;
3746 case ISD::VP_TRUNCATE:
3747 case ISD::TRUNCATE:
3748 Res = SplitVecOp_TruncateHelper(N);
3749 break;
3751 case ISD::VP_FP_ROUND:
3752 case ISD::FP_ROUND:
3754 Res = SplitVecOp_FP_ROUND(N);
3755 break;
3756 case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break;
3757 case ISD::STORE:
3758 Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
3759 break;
3760 case ISD::VP_STORE:
3761 Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo);
3762 break;
3763 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
3764 Res = SplitVecOp_VP_STRIDED_STORE(cast<VPStridedStoreSDNode>(N), OpNo);
3765 break;
3766 case ISD::MSTORE:
3767 Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
3768 break;
3769 case ISD::MSCATTER:
3770 case ISD::VP_SCATTER:
3771 Res = SplitVecOp_Scatter(cast<MemSDNode>(N), OpNo);
3772 break;
3773 case ISD::MGATHER:
3774 case ISD::VP_GATHER:
3775 Res = SplitVecOp_Gather(cast<MemSDNode>(N), OpNo);
3776 break;
3777 case ISD::VSELECT:
3778 Res = SplitVecOp_VSELECT(N, OpNo);
3779 break;
3781 Res = SplitVecOp_VECTOR_COMPRESS(N, OpNo);
3782 break;
3785 case ISD::SINT_TO_FP:
3786 case ISD::UINT_TO_FP:
3787 case ISD::VP_SINT_TO_FP:
3788 case ISD::VP_UINT_TO_FP:
3789 if (N->getValueType(0).bitsLT(
3790 N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
3791 Res = SplitVecOp_TruncateHelper(N);
3792 else
3793 Res = SplitVecOp_UnaryOp(N);
3794 break;
3797 Res = SplitVecOp_FP_TO_XINT_SAT(N);
3798 break;
3799 case ISD::FP_TO_SINT:
3800 case ISD::FP_TO_UINT:
3801 case ISD::VP_FP_TO_SINT:
3802 case ISD::VP_FP_TO_UINT:
3806 case ISD::FP_EXTEND:
3807 case ISD::SIGN_EXTEND:
3808 case ISD::ZERO_EXTEND:
3809 case ISD::ANY_EXTEND:
3810 case ISD::FTRUNC:
3811 case ISD::LROUND:
3812 case ISD::LLROUND:
3813 case ISD::LRINT:
3814 case ISD::LLRINT:
3815 Res = SplitVecOp_UnaryOp(N);
3816 break;
3817 case ISD::FLDEXP:
3818 Res = SplitVecOp_FPOpDifferentTypes(N);
3819 break;
3820
3821 case ISD::SCMP:
3822 case ISD::UCMP:
3823 Res = SplitVecOp_CMP(N);
3824 break;
3825
3826 case ISD::FAKE_USE:
3827 Res = SplitVecOp_FAKE_USE(N);
3828 break;
3832 Res = SplitVecOp_ExtVecInRegOp(N);
3833 break;
3834
3837 case ISD::VECREDUCE_ADD:
3838 case ISD::VECREDUCE_MUL:
3839 case ISD::VECREDUCE_AND:
3840 case ISD::VECREDUCE_OR:
3841 case ISD::VECREDUCE_XOR:
3850 Res = SplitVecOp_VECREDUCE(N, OpNo);
3851 break;
3854 Res = SplitVecOp_VECREDUCE_SEQ(N);
3855 break;
3856 case ISD::VP_REDUCE_FADD:
3857 case ISD::VP_REDUCE_SEQ_FADD:
3858 case ISD::VP_REDUCE_FMUL:
3859 case ISD::VP_REDUCE_SEQ_FMUL:
3860 case ISD::VP_REDUCE_ADD:
3861 case ISD::VP_REDUCE_MUL:
3862 case ISD::VP_REDUCE_AND:
3863 case ISD::VP_REDUCE_OR:
3864 case ISD::VP_REDUCE_XOR:
3865 case ISD::VP_REDUCE_SMAX:
3866 case ISD::VP_REDUCE_SMIN:
3867 case ISD::VP_REDUCE_UMAX:
3868 case ISD::VP_REDUCE_UMIN:
3869 case ISD::VP_REDUCE_FMAX:
3870 case ISD::VP_REDUCE_FMIN:
3871 case ISD::VP_REDUCE_FMAXIMUM:
3872 case ISD::VP_REDUCE_FMINIMUM:
3873 Res = SplitVecOp_VP_REDUCE(N, OpNo);
3874 break;
3875 case ISD::CTTZ_ELTS:
3877 Res = SplitVecOp_CttzElts(N);
3878 break;
3879 case ISD::VP_CTTZ_ELTS:
3880 case ISD::VP_CTTZ_ELTS_ZERO_POISON:
3881 Res = SplitVecOp_VP_CttzElements(N);
3882 break;
3884 Res = SplitVecOp_VECTOR_HISTOGRAM(N);
3885 break;
3890 Res = SplitVecOp_PARTIAL_REDUCE_MLA(N);
3891 break;
3892 }
3893
3894 // If the result is null, the sub-method took care of registering results etc.
3895 if (!Res.getNode()) return false;
3896
3897 // If the result is N, the sub-method updated N in place. Tell the legalizer
3898 // core about this.
3899 if (Res.getNode() == N)
3900 return true;
3901
3902 if (N->isStrictFPOpcode())
3903 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
3904 "Invalid operand expansion");
3905 else
3906 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
3907 "Invalid operand expansion");
3908
3909 ReplaceValueWith(SDValue(N, 0), Res);
3910 return false;
3911}
3912
3913SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N) {
3914 SDLoc DL(N);
3915
3916 SDValue LoMask, HiMask;
3917 GetSplitVector(N->getOperand(0), LoMask, HiMask);
3918
3919 EVT VT = N->getValueType(0);
3920 EVT SplitVT = LoMask.getValueType();
3921 ElementCount SplitEC = SplitVT.getVectorElementCount();
3922
3923 // Find the last active in both the low and the high masks.
3924 SDValue LoFind = DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, VT, LoMask);
3925 SDValue HiFind = DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, VT, HiMask);
3926
3927 // Check if any lane is active in the high mask.
3928 // FIXME: This would not be necessary if VECTOR_FIND_LAST_ACTIVE returned a
3929 // sentinel value for "none active".
3930 SDValue AnyHiActive = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, HiMask);
3931 SDValue Cond = DAG.getBoolExtOrTrunc(AnyHiActive, DL,
3932 getSetCCResultType(MVT::i1), MVT::i1);
3933
3934 // Return: AnyHiActive ? (HiFind + SplitEC) : LoFind;
3935 return DAG.getNode(ISD::SELECT, DL, VT, Cond,
3936 DAG.getNode(ISD::ADD, DL, VT, HiFind,
3937 DAG.getElementCount(DL, VT, SplitEC)),
3938 LoFind);
3939}
3940
3941SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
3942 // The only possibility for an illegal operand is the mask, since result type
3943 // legalization would have handled this node already otherwise.
3944 assert(OpNo == 0 && "Illegal operand must be mask");
3945
3946 SDValue Mask = N->getOperand(0);
3947 SDValue Src0 = N->getOperand(1);
3948 SDValue Src1 = N->getOperand(2);
3949 EVT Src0VT = Src0.getValueType();
3950 SDLoc DL(N);
3951 assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
3952
3953 SDValue Lo, Hi;
3954 GetSplitVector(N->getOperand(0), Lo, Hi);
3955 assert(Lo.getValueType() == Hi.getValueType() &&
3956 "Lo and Hi have differing types");
3957
3958 EVT LoOpVT, HiOpVT;
3959 std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
3960 assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
3961
3962 SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
3963 std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
3964 std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
3965 std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
3966
3967 SDValue LoSelect =
3968 DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
3969 SDValue HiSelect =
3970 DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
3971
3972 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
3973}
3974
3975SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_COMPRESS(SDNode *N, unsigned OpNo) {
3976 // The only possibility for an illegal operand is the mask, since result type
3977 // legalization would have handled this node already otherwise.
3978 assert(OpNo == 1 && "Illegal operand must be mask");
3979
3980 // To split the mask, we need to split the result type too, so we can just
3981 // reuse that logic here.
3982 SDValue Lo, Hi;
3983 SplitVecRes_VECTOR_COMPRESS(N, Lo, Hi);
3984
3985 EVT VecVT = N->getValueType(0);
3986 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VecVT, Lo, Hi);
3987}
3988
3989SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
3990 EVT ResVT = N->getValueType(0);
3991 SDValue Lo, Hi;
3992 SDLoc dl(N);
3993
3994 SDValue VecOp = N->getOperand(OpNo);
3995 EVT VecVT = VecOp.getValueType();
3996 assert(VecVT.isVector() && "Can only split reduce vector operand");
3997 GetSplitVector(VecOp, Lo, Hi);
3998 EVT LoOpVT, HiOpVT;
3999 std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
4000
4001 // Use the appropriate scalar instruction on the split subvectors before
4002 // reducing the now partially reduced smaller vector.
4003 unsigned CombineOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
4004 SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags());
4005 return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());
4006}
4007
4008SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) {
4009 EVT ResVT = N->getValueType(0);
4010 SDValue Lo, Hi;
4011 SDLoc dl(N);
4012
4013 SDValue AccOp = N->getOperand(0);
4014 SDValue VecOp = N->getOperand(1);
4015 SDNodeFlags Flags = N->getFlags();
4016
4017 EVT VecVT = VecOp.getValueType();
4018 assert(VecVT.isVector() && "Can only split reduce vector operand");
4019 GetSplitVector(VecOp, Lo, Hi);
4020 EVT LoOpVT, HiOpVT;
4021 std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
4022
4023 // Reduce low half.
4024 SDValue Partial = DAG.getNode(N->getOpcode(), dl, ResVT, AccOp, Lo, Flags);
4025
4026 // Reduce high half, using low half result as initial value.
4027 return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags);
4028}
4029
4030SDValue DAGTypeLegalizer::SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
4031 assert(N->isVPOpcode() && "Expected VP opcode");
4032 assert(OpNo == 1 && "Can only split reduce vector operand");
4033
4034 unsigned Opc = N->getOpcode();
4035 EVT ResVT = N->getValueType(0);
4036 SDValue Lo, Hi;
4037 SDLoc dl(N);
4038
4039 SDValue VecOp = N->getOperand(OpNo);
4040 EVT VecVT = VecOp.getValueType();
4041 assert(VecVT.isVector() && "Can only split reduce vector operand");
4042 GetSplitVector(VecOp, Lo, Hi);
4043
4044 SDValue MaskLo, MaskHi;
4045 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
4046
4047 SDValue EVLLo, EVLHi;
4048 std::tie(EVLLo, EVLHi) = DAG.SplitEVL(N->getOperand(3), VecVT, dl);
4049
4050 const SDNodeFlags Flags = N->getFlags();
4051
4052 SDValue ResLo =
4053 DAG.getNode(Opc, dl, ResVT, {N->getOperand(0), Lo, MaskLo, EVLLo}, Flags);
4054 return DAG.getNode(Opc, dl, ResVT, {ResLo, Hi, MaskHi, EVLHi}, Flags);
4055}
4056
4057SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
4058 // The result has a legal vector type, but the input needs splitting.
4059 EVT ResVT = N->getValueType(0);
4060 SDValue Lo, Hi;
4061 SDLoc dl(N);
4062 GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
4063 EVT InVT = Lo.getValueType();
4064
4065 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
4066 InVT.getVectorElementCount());
4067
4068 if (N->isStrictFPOpcode()) {
4069 Lo = DAG.getNode(N->getOpcode(), dl, {OutVT, MVT::Other},
4070 {N->getOperand(0), Lo});
4071 Hi = DAG.getNode(N->getOpcode(), dl, {OutVT, MVT::Other},
4072 {N->getOperand(0), Hi});
4073
4074 // Build a factor node to remember that this operation is independent
4075 // of the other one.
4076 SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
4077 Hi.getValue(1));
4078
4079 // Legalize the chain result - switch anything that used the old chain to
4080 // use the new one.
4081 ReplaceValueWith(SDValue(N, 1), Ch);
4082 } else if (N->getNumOperands() == 3) {
4083 assert(N->isVPOpcode() && "Expected VP opcode");
4084 SDValue MaskLo, MaskHi, EVLLo, EVLHi;
4085 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
4086 std::tie(EVLLo, EVLHi) =
4087 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
4088 Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo, MaskLo, EVLLo);
4089 Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi, MaskHi, EVLHi);
4090 } else {
4091 Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
4092 Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
4093 }
4094
4095 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
4096}
4097
4098// Split a FAKE_USE use of a vector into FAKE_USEs of hi and lo part.
4099SDValue DAGTypeLegalizer::SplitVecOp_FAKE_USE(SDNode *N) {
4100 SDValue Lo, Hi;
4101 GetSplitVector(N->getOperand(1), Lo, Hi);
4102 SDValue Chain =
4103 DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Lo);
4104 return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Hi);
4105}
4106
4107SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
4108 // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
4109 // end up being split all the way down to individual components. Convert the
4110 // split pieces into integers and reassemble.
4111 EVT ResVT = N->getValueType(0);
4112 SDValue Lo, Hi;
4113 GetSplitVector(N->getOperand(0), Lo, Hi);
4114 SDLoc dl(N);
4115
4116 if (ResVT.isScalableVector()) {
4117 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(ResVT);
4118 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
4119 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
4120 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
4121 }
4122
4123 Lo = BitConvertToInteger(Lo);
4124 Hi = BitConvertToInteger(Hi);
4125
4126 if (DAG.getDataLayout().isBigEndian())
4127 std::swap(Lo, Hi);
4128
4129 return DAG.getNode(ISD::BITCAST, dl, ResVT, JoinIntegers(Lo, Hi));
4130}
4131
4132SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
4133 unsigned OpNo) {
4134 assert(OpNo == 1 && "Invalid OpNo; can only split SubVec.");
4135 // We know that the result type is legal.
4136 EVT ResVT = N->getValueType(0);
4137
4138 SDValue Vec = N->getOperand(0);
4139 SDValue SubVec = N->getOperand(1);
4140 SDValue Idx = N->getOperand(2);
4141 SDLoc dl(N);
4142
4143 SDValue Lo, Hi;
4144 GetSplitVector(SubVec, Lo, Hi);
4145
4146 uint64_t IdxVal = Idx->getAsZExtVal();
4147 uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
4148
4149 SDValue FirstInsertion =
4150 DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx);
4151 SDValue SecondInsertion =
4152 DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi,
4153 DAG.getVectorIdxConstant(IdxVal + LoElts, dl));
4154
4155 return SecondInsertion;
4156}
4157
4158SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
4159 // We know that the extracted result type is legal.
4160 EVT SubVT = N->getValueType(0);
4161 SDValue Idx = N->getOperand(1);
4162 SDLoc dl(N);
4163 SDValue Lo, Hi;
4164
4165 GetSplitVector(N->getOperand(0), Lo, Hi);
4166
4167 ElementCount LoElts = Lo.getValueType().getVectorElementCount();
4168 // Note: For scalable vectors, the index is scaled by vscale.
4169 ElementCount IdxVal =
4171 uint64_t IdxValMin = IdxVal.getKnownMinValue();
4172
4173 EVT SrcVT = N->getOperand(0).getValueType();
4174 ElementCount NumResultElts = SubVT.getVectorElementCount();
4175
4176 // If the extracted elements are all in the low half, do a simple extract.
4177 if (ElementCount::isKnownLE(IdxVal + NumResultElts, LoElts))
4178 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
4179
4180 unsigned LoEltsMin = LoElts.getKnownMinValue();
4181 if (IdxValMin < LoEltsMin && SubVT.isFixedLengthVector() &&
4182 SrcVT.isFixedLengthVector()) {
4183 // Extracted subvector crosses vector split, so we need to blend the two
4184 // halves.
4185 // TODO: May be able to emit partial extract_subvector.
4187 Elts.reserve(NumResultElts.getFixedValue());
4188
4189 // This is not valid for scalable vectors. If SubVT is scalable, this is the
4190 // same as unrolling a scalable dimension (invalid). If ScrVT is scalable,
4191 // `Lo[LoEltsMin]` may not be the last element of `Lo`.
4192 DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxValMin,
4193 /*Count=*/LoEltsMin - IdxValMin);
4194 DAG.ExtractVectorElements(Hi, Elts, /*Start=*/0,
4195 /*Count=*/SubVT.getVectorNumElements() -
4196 Elts.size());
4197 return DAG.getBuildVector(SubVT, dl, Elts);
4198 }
4199
4200 if (SubVT.isScalableVector() == SrcVT.isScalableVector()) {
4201 ElementCount ExtractIdx = IdxVal - LoElts;
4202 if (ExtractIdx.isKnownMultipleOf(NumResultElts))
4203 return DAG.getExtractSubvector(dl, SubVT, Hi,
4204 ExtractIdx.getKnownMinValue());
4205
4206 EVT HiVT = Hi.getValueType();
4207 assert(HiVT.isFixedLengthVector() &&
4208 "Only fixed-vector extracts are supported in this case");
4209
4210 // We cannot create an extract_subvector that isn't a multiple of the
4211 // result size, which may go out of bounds for the last elements. Shuffle
4212 // the desired elements down to 0 and do a simple 0 extract.
4213 SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1);
4214 for (int I = 0; I != int(NumResultElts.getFixedValue()); ++I)
4215 Mask[I] = int(ExtractIdx.getFixedValue()) + I;
4216
4217 SDValue Shuffle =
4218 DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask);
4219 return DAG.getExtractSubvector(dl, SubVT, Shuffle, 0);
4220 }
4221
4222 // After this point the DAG node only permits extracting fixed-width
4223 // subvectors from scalable vectors.
4224 assert(SubVT.isFixedLengthVector() &&
4225 "Extracting scalable subvector from fixed-width unsupported");
4226
4227 // If the element type is i1 and we're not promoting the result, then we may
4228 // end up loading the wrong data since the bits are packed tightly into
4229 // bytes. For example, if we extract a v4i1 (legal) from a nxv4i1 (legal)
4230 // type at index 4, then we will load a byte starting at index 0.
4231 if (SubVT.getScalarType() == MVT::i1)
4232 report_fatal_error("Don't know how to extract fixed-width predicate "
4233 "subvector from a scalable predicate vector");
4234
4235 // Spill the vector to the stack. We should use the alignment for
4236 // the smallest part.
4237 SDValue Vec = N->getOperand(0);
4238 EVT VecVT = Vec.getValueType();
4239 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
4241 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
4242 auto &MF = DAG.getMachineFunction();
4243 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
4244 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
4245
4246 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
4247 SmallestAlign);
4248
4249 // Extract the subvector by loading the correct part.
4250 StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVT, Idx);
4251
4252 return DAG.getLoad(
4253 SubVT, dl, Store, StackPtr,
4254 MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
4255}
4256
4257SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
4258 SDValue Vec = N->getOperand(0);
4259 SDValue Idx = N->getOperand(1);
4260 EVT VecVT = Vec.getValueType();
4261
4262 if (const ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Idx)) {
4263 uint64_t IdxVal = Index->getZExtValue();
4264
4265 SDValue Lo, Hi;
4266 GetSplitVector(Vec, Lo, Hi);
4267
4268 uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
4269
4270 if (IdxVal < LoElts)
4271 return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
4272 else if (!Vec.getValueType().isScalableVector())
4273 return SDValue(DAG.UpdateNodeOperands(N, Hi,
4274 DAG.getConstant(IdxVal - LoElts, SDLoc(N),
4275 Idx.getValueType())), 0);
4276 }
4277
4278 // See if the target wants to custom expand this node.
4279 if (CustomLowerNode(N, N->getValueType(0), true))
4280 return SDValue();
4281
4282 // Make the vector elements byte-addressable if they aren't already.
4283 SDLoc dl(N);
4284 EVT EltVT = VecVT.getVectorElementType();
4285 if (!EltVT.isByteSized()) {
4286 EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext());
4287 VecVT = VecVT.changeElementType(*DAG.getContext(), EltVT);
4288 Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
4289 SDValue NewExtract =
4290 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, Idx);
4291 return DAG.getAnyExtOrTrunc(NewExtract, dl, N->getValueType(0));
4292 }
4293
4294 // Store the vector to the stack.
4295 // In cases where the vector is illegal it will be broken down into parts
4296 // and stored in parts - we should use the alignment for the smallest part.
4297 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
4299 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
4300 auto &MF = DAG.getMachineFunction();
4301 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
4302 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
4303 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
4304 SmallestAlign);
4305
4306 // Load back the required element.
4307 StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
4308
4309 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
4310 // type, leaving the high bits undefined. But it can't truncate.
4311 assert(N->getValueType(0).bitsGE(EltVT) && "Illegal EXTRACT_VECTOR_ELT.");
4312
4313 return DAG.getExtLoad(
4314 ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
4315 MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT,
4316 commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
4317}
4318
4319SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
4320 SDValue Lo, Hi;
4321
4322 // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so
4323 // splitting the result has the same effect as splitting the input operand.
4324 SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
4325
4326 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
4327}
4328
4329SDValue DAGTypeLegalizer::SplitVecOp_Gather(MemSDNode *N, unsigned OpNo) {
4330 (void)OpNo;
4331 SDValue Lo, Hi;
4332 SplitVecRes_Gather(N, Lo, Hi);
4333
4334 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, N, N->getValueType(0), Lo, Hi);
4335 ReplaceValueWith(SDValue(N, 0), Res);
4336 return SDValue();
4337}
4338
4339SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
4340 assert(N->isUnindexed() && "Indexed vp_store of vector?");
4341 SDValue Ch = N->getChain();
4342 SDValue Ptr = N->getBasePtr();
4343 SDValue Offset = N->getOffset();
4344 assert(Offset.isUndef() && "Unexpected VP store offset");
4345 SDValue Mask = N->getMask();
4346 SDValue EVL = N->getVectorLength();
4347 SDValue Data = N->getValue();
4348 Align Alignment = N->getBaseAlign();
4349 SDLoc DL(N);
4350
4351 SDValue DataLo, DataHi;
4352 if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
4353 // Split Data operand
4354 GetSplitVector(Data, DataLo, DataHi);
4355 else
4356 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
4357
4358 // Split Mask operand
4359 SDValue MaskLo, MaskHi;
4360 if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
4361 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
4362 } else {
4363 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
4364 GetSplitVector(Mask, MaskLo, MaskHi);
4365 else
4366 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
4367 }
4368
4369 EVT MemoryVT = N->getMemoryVT();
4370 EVT LoMemVT, HiMemVT;
4371 bool HiIsEmpty = false;
4372 std::tie(LoMemVT, HiMemVT) =
4373 DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
4374
4375 // Split EVL
4376 SDValue EVLLo, EVLHi;
4377 std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, Data.getValueType(), DL);
4378
4379 SDValue Lo, Hi;
4380 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4381 N->getPointerInfo(), MachineMemOperand::MOStore,
4382 LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
4383 N->getRanges());
4384
4385 Lo = DAG.getStoreVP(Ch, DL, DataLo, Ptr, Offset, MaskLo, EVLLo, LoMemVT, MMO,
4386 N->getAddressingMode(), N->isTruncatingStore(),
4387 N->isCompressingStore());
4388
4389 // If the hi vp_store has zero storage size, only the lo vp_store is needed.
4390 if (HiIsEmpty)
4391 return Lo;
4392
4393 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
4394 N->isCompressingStore());
4395
4396 MachinePointerInfo MPI;
4397 if (LoMemVT.isScalableVector()) {
4398 Alignment = commonAlignment(Alignment,
4399 LoMemVT.getSizeInBits().getKnownMinValue() / 8);
4400 MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
4401 } else
4402 MPI = N->getPointerInfo().getWithOffset(
4403 LoMemVT.getStoreSize().getFixedValue());
4404
4405 MMO = DAG.getMachineFunction().getMachineMemOperand(
4407 Alignment, N->getAAInfo(), N->getRanges());
4408
4409 Hi = DAG.getStoreVP(Ch, DL, DataHi, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO,
4410 N->getAddressingMode(), N->isTruncatingStore(),
4411 N->isCompressingStore());
4412
4413 // Build a factor node to remember that this store is independent of the
4414 // other one.
4415 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
4416}
4417
4418SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N,
4419 unsigned OpNo) {
4420 assert(N->isUnindexed() && "Indexed vp_strided_store of a vector?");
4421 assert(N->getOffset().isUndef() && "Unexpected VP strided store offset");
4422
4423 SDLoc DL(N);
4424
4425 SDValue Data = N->getValue();
4426 SDValue LoData, HiData;
4427 if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
4428 GetSplitVector(Data, LoData, HiData);
4429 else
4430 std::tie(LoData, HiData) = DAG.SplitVector(Data, DL);
4431
4432 EVT LoMemVT, HiMemVT;
4433 bool HiIsEmpty = false;
4434 std::tie(LoMemVT, HiMemVT) = DAG.GetDependentSplitDestVTs(
4435 N->getMemoryVT(), LoData.getValueType(), &HiIsEmpty);
4436
4437 SDValue Mask = N->getMask();
4438 SDValue LoMask, HiMask;
4439 if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC)
4440 SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
4441 else if (getTypeAction(Mask.getValueType()) ==
4443 GetSplitVector(Mask, LoMask, HiMask);
4444 else
4445 std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
4446
4447 SDValue LoEVL, HiEVL;
4448 std::tie(LoEVL, HiEVL) =
4449 DAG.SplitEVL(N->getVectorLength(), Data.getValueType(), DL);
4450
4451 // Generate the low vp_strided_store
4452 SDValue Lo = DAG.getStridedStoreVP(
4453 N->getChain(), DL, LoData, N->getBasePtr(), N->getOffset(),
4454 N->getStride(), LoMask, LoEVL, LoMemVT, N->getMemOperand(),
4455 N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore());
4456
4457 // If the high vp_strided_store has zero storage size, only the low
4458 // vp_strided_store is needed.
4459 if (HiIsEmpty)
4460 return Lo;
4461
4462 // Generate the high vp_strided_store.
4463 // To calculate the high base address, we need to sum to the low base
4464 // address stride number of bytes for each element already stored by low,
4465 // that is: Ptr = Ptr + (LoEVL * Stride)
4466 EVT PtrVT = N->getBasePtr().getValueType();
4468 DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
4469 DAG.getSExtOrTrunc(N->getStride(), DL, PtrVT));
4470 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, N->getBasePtr(), Increment);
4471
4472 Align Alignment = N->getBaseAlign();
4473 if (LoMemVT.isScalableVector())
4474 Alignment = commonAlignment(Alignment,
4475 LoMemVT.getSizeInBits().getKnownMinValue() / 8);
4476
4477 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4478 MachinePointerInfo(N->getPointerInfo().getAddrSpace()),
4480 Alignment, N->getAAInfo(), N->getRanges());
4481
4482 SDValue Hi = DAG.getStridedStoreVP(
4483 N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask,
4484 HiEVL, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(),
4485 N->isCompressingStore());
4486
4487 // Build a factor node to remember that this store is independent of the
4488 // other one.
4489 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
4490}
4491
4492SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
4493 unsigned OpNo) {
4494 assert(N->isUnindexed() && "Indexed masked store of vector?");
4495 SDValue Ch = N->getChain();
4496 SDValue Ptr = N->getBasePtr();
4497 SDValue Offset = N->getOffset();
4498 assert(Offset.isUndef() && "Unexpected indexed masked store offset");
4499 SDValue Mask = N->getMask();
4500 SDValue Data = N->getValue();
4501 Align Alignment = N->getBaseAlign();
4502 SDLoc DL(N);
4503
4504 SDValue DataLo, DataHi;
4505 if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
4506 // Split Data operand
4507 GetSplitVector(Data, DataLo, DataHi);
4508 else
4509 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
4510
4511 // Split Mask operand
4512 SDValue MaskLo, MaskHi;
4513 if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
4514 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
4515 } else {
4516 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
4517 GetSplitVector(Mask, MaskLo, MaskHi);
4518 else
4519 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
4520 }
4521
4522 EVT MemoryVT = N->getMemoryVT();
4523 EVT LoMemVT, HiMemVT;
4524 bool HiIsEmpty = false;
4525 std::tie(LoMemVT, HiMemVT) =
4526 DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
4527
4528 SDValue Lo, Hi, Res;
4529 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4530 N->getPointerInfo(), MachineMemOperand::MOStore,
4531 LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
4532 N->getRanges());
4533
4534 Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
4535 N->getAddressingMode(), N->isTruncatingStore(),
4536 N->isCompressingStore());
4537
4538 if (HiIsEmpty) {
4539 // The hi masked store has zero storage size.
4540 // Only the lo masked store is needed.
4541 Res = Lo;
4542 } else {
4543
4544 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
4545 N->isCompressingStore());
4546
4547 MachinePointerInfo MPI;
4548 if (LoMemVT.isScalableVector()) {
4549 Alignment = commonAlignment(
4550 Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
4551 MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
4552 } else
4553 MPI = N->getPointerInfo().getWithOffset(
4554 LoMemVT.getStoreSize().getFixedValue());
4555
4556 MMO = DAG.getMachineFunction().getMachineMemOperand(
4558 Alignment, N->getAAInfo(), N->getRanges());
4559
4560 Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
4561 N->getAddressingMode(), N->isTruncatingStore(),
4562 N->isCompressingStore());
4563
4564 // Build a factor node to remember that this store is independent of the
4565 // other one.
4566 Res = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
4567 }
4568
4569 return Res;
4570}
4571
4572SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
4573 SDValue Ch = N->getChain();
4574 SDValue Ptr = N->getBasePtr();
4575 EVT MemoryVT = N->getMemoryVT();
4576 Align Alignment = N->getBaseAlign();
4577 SDLoc DL(N);
4578 struct Operands {
4579 SDValue Mask;
4580 SDValue Index;
4581 SDValue Scale;
4582 SDValue Data;
4583 } Ops = [&]() -> Operands {
4584 if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
4585 return {MSC->getMask(), MSC->getIndex(), MSC->getScale(),
4586 MSC->getValue()};
4587 }
4588 auto *VPSC = cast<VPScatterSDNode>(N);
4589 return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale(),
4590 VPSC->getValue()};
4591 }();
4592 // Split all operands
4593
4594 EVT LoMemVT, HiMemVT;
4595 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
4596
4597 SDValue DataLo, DataHi;
4598 if (getTypeAction(Ops.Data.getValueType()) == TargetLowering::TypeSplitVector)
4599 // Split Data operand
4600 GetSplitVector(Ops.Data, DataLo, DataHi);
4601 else
4602 std::tie(DataLo, DataHi) = DAG.SplitVector(Ops.Data, DL);
4603
4604 // Split Mask operand
4605 SDValue MaskLo, MaskHi;
4606 if (OpNo == 1 && Ops.Mask.getOpcode() == ISD::SETCC) {
4607 SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
4608 } else {
4609 std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, DL);
4610 }
4611
4612 SDValue IndexHi, IndexLo;
4613 if (getTypeAction(Ops.Index.getValueType()) ==
4615 GetSplitVector(Ops.Index, IndexLo, IndexHi);
4616 else
4617 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
4618
4619 SDValue Lo;
4620 MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
4621 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4622 N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
4623 Alignment, N->getAAInfo(), N->getRanges());
4624
4625 if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
4626 SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
4627 Lo =
4628 DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
4629 MSC->getIndexType(), MSC->isTruncatingStore());
4630
4631 // The order of the Scatter operation after split is well defined. The "Hi"
4632 // part comes after the "Lo". So these two operations should be chained one
4633 // after another.
4634 SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Ops.Scale};
4635 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi,
4636 MMO, MSC->getIndexType(),
4637 MSC->isTruncatingStore());
4638 }
4639 auto *VPSC = cast<VPScatterSDNode>(N);
4640 SDValue EVLLo, EVLHi;
4641 std::tie(EVLLo, EVLHi) =
4642 DAG.SplitEVL(VPSC->getVectorLength(), Ops.Data.getValueType(), DL);
4643
4644 SDValue OpsLo[] = {Ch, DataLo, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
4645 Lo = DAG.getScatterVP(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
4646 VPSC->getIndexType());
4647
4648 // The order of the Scatter operation after split is well defined. The "Hi"
4649 // part comes after the "Lo". So these two operations should be chained one
4650 // after another.
4651 SDValue OpsHi[] = {Lo, DataHi, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
4652 return DAG.getScatterVP(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, MMO,
4653 VPSC->getIndexType());
4654}
4655
4656SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
4657 assert(N->isUnindexed() && "Indexed store of vector?");
4658 assert(OpNo == 1 && "Can only split the stored value");
4659 SDLoc DL(N);
4660
4661 bool isTruncating = N->isTruncatingStore();
4662 SDValue Ch = N->getChain();
4663 SDValue Ptr = N->getBasePtr();
4664 EVT MemoryVT = N->getMemoryVT();
4665 Align Alignment = N->getBaseAlign();
4666 MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
4667 AAMDNodes AAInfo = N->getAAInfo();
4668 SDValue Lo, Hi;
4669 GetSplitVector(N->getOperand(1), Lo, Hi);
4670
4671 EVT LoMemVT, HiMemVT;
4672 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
4673
4674 // Scalarize if the split halves are not byte-sized.
4675 if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized())
4676 return TLI.scalarizeVectorStore(N, DAG);
4677
4678 if (isTruncating)
4679 Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
4680 Alignment, MMOFlags, AAInfo);
4681 else
4682 Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
4683 AAInfo);
4684
4685 MachinePointerInfo MPI;
4686 IncrementPointer(N, LoMemVT, MPI, Ptr);
4687
4688 if (isTruncating)
4689 Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI,
4690 HiMemVT, Alignment, MMOFlags, AAInfo);
4691 else
4692 Hi = DAG.getStore(Ch, DL, Hi, Ptr, MPI, Alignment, MMOFlags, AAInfo);
4693
4694 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
4695}
4696
4697SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
4698 SDLoc DL(N);
4699
4700 // The input operands all must have the same type, and we know the result
4701 // type is valid. Convert this to a buildvector which extracts all the
4702 // input elements.
4703 // TODO: If the input elements are power-two vectors, we could convert this to
4704 // a new CONCAT_VECTORS node with elements that are half-wide.
4706 EVT EltVT = N->getValueType(0).getVectorElementType();
4707 for (const SDValue &Op : N->op_values()) {
4708 for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
4709 i != e; ++i) {
4710 Elts.push_back(DAG.getExtractVectorElt(DL, EltVT, Op, i));
4711 }
4712 }
4713
4714 return DAG.getBuildVector(N->getValueType(0), DL, Elts);
4715}
4716
4717SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
4718 // The result type is legal, but the input type is illegal. If splitting
4719 // ends up with the result type of each half still being legal, just
4720 // do that. If, however, that would result in an illegal result type,
4721 // we can try to get more clever with power-two vectors. Specifically,
4722 // split the input type, but also widen the result element size, then
4723 // concatenate the halves and truncate again. For example, consider a target
4724 // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
4725 // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
4726 // %inlo = v4i32 extract_subvector %in, 0
4727 // %inhi = v4i32 extract_subvector %in, 4
4728 // %lo16 = v4i16 trunc v4i32 %inlo
4729 // %hi16 = v4i16 trunc v4i32 %inhi
4730 // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
4731 // %res = v8i8 trunc v8i16 %in16
4732 //
4733 // Without this transform, the original truncate would end up being
4734 // scalarized, which is pretty much always a last resort.
4735 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
4736 SDValue InVec = N->getOperand(OpNo);
4737 EVT InVT = InVec->getValueType(0);
4738 EVT OutVT = N->getValueType(0);
4739 ElementCount NumElements = OutVT.getVectorElementCount();
4740 bool IsFloat = OutVT.isFloatingPoint();
4741
4742 unsigned InElementSize = InVT.getScalarSizeInBits();
4743 unsigned OutElementSize = OutVT.getScalarSizeInBits();
4744
4745 // Determine the split output VT. If its legal we can just split dirctly.
4746 EVT LoOutVT, HiOutVT;
4747 std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
4748 assert(LoOutVT == HiOutVT && "Unequal split?");
4749
4750 // If the input elements are only 1/2 the width of the result elements,
4751 // just use the normal splitting. Our trick only work if there's room
4752 // to split more than once.
4753 if (isTypeLegal(LoOutVT) || InElementSize <= OutElementSize * 2 ||
4754 (IsFloat && !isPowerOf2_32(InElementSize)))
4755 return SplitVecOp_UnaryOp(N);
4756 SDLoc DL(N);
4757
4758 // Don't touch if this will be scalarized.
4759 EVT FinalVT = InVT;
4760 while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
4761 FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext());
4762
4763 if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector)
4764 return SplitVecOp_UnaryOp(N);
4765
4766 // Get the split input vector.
4767 SDValue InLoVec, InHiVec;
4768 GetSplitVector(InVec, InLoVec, InHiVec);
4769
4770 // Truncate them to 1/2 the element size.
4771 //
4772 // This assumes the number of elements is a power of two; any vector that
4773 // isn't should be widened, not split.
4774 EVT HalfElementVT = IsFloat ?
4775 EVT::getFloatingPointVT(InElementSize/2) :
4776 EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
4777 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
4778 NumElements.divideCoefficientBy(2));
4779
4780 SDValue HalfLo;
4781 SDValue HalfHi;
4782 SDValue Chain;
4783 if (N->isStrictFPOpcode()) {
4784 HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
4785 {N->getOperand(0), InLoVec});
4786 HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
4787 {N->getOperand(0), InHiVec});
4788 // Legalize the chain result - switch anything that used the old chain to
4789 // use the new one.
4790 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
4791 HalfHi.getValue(1));
4792 } else {
4793 HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
4794 HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
4795 }
4796
4797 // Concatenate them to get the full intermediate truncation result.
4798 EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
4799 SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
4800 HalfHi);
4801 // Now finish up by truncating all the way down to the original result
4802 // type. This should normally be something that ends up being legal directly,
4803 // but in theory if a target has very wide vectors and an annoyingly
4804 // restricted set of legal types, this split can chain to build things up.
4805
4806 if (N->isStrictFPOpcode()) {
4807 SDValue Res = DAG.getNode(
4808 ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
4809 {Chain, InterVec,
4810 DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
4811 // Relink the chain
4812 ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
4813 return Res;
4814 }
4815
4816 return IsFloat
4817 ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
4818 DAG.getTargetConstant(
4819 0, DL, TLI.getPointerTy(DAG.getDataLayout())))
4820 : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
4821}
4822
4823SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
4824 unsigned Opc = N->getOpcode();
4825 bool isStrict = Opc == ISD::STRICT_FSETCC || Opc == ISD::STRICT_FSETCCS;
4826 assert(N->getValueType(0).isVector() &&
4827 N->getOperand(isStrict ? 1 : 0).getValueType().isVector() &&
4828 "Operand types must be vectors");
4829 // The result has a legal vector type, but the input needs splitting.
4830 SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
4831 SDLoc DL(N);
4832 GetSplitVector(N->getOperand(isStrict ? 1 : 0), Lo0, Hi0);
4833 GetSplitVector(N->getOperand(isStrict ? 2 : 1), Lo1, Hi1);
4834
4835 EVT VT = N->getValueType(0);
4836 EVT PartResVT = getSetCCResultType(Lo0.getValueType());
4837
4838 if (Opc == ISD::SETCC) {
4839 LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
4840 HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
4841 } else if (isStrict) {
4842 LoRes = DAG.getNode(Opc, DL, DAG.getVTList(PartResVT, N->getValueType(1)),
4843 N->getOperand(0), Lo0, Lo1, N->getOperand(3));
4844 HiRes = DAG.getNode(Opc, DL, DAG.getVTList(PartResVT, N->getValueType(1)),
4845 N->getOperand(0), Hi0, Hi1, N->getOperand(3));
4846 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
4847 LoRes.getValue(1), HiRes.getValue(1));
4848 ReplaceValueWith(SDValue(N, 1), NewChain);
4849 } else {
4850 assert(Opc == ISD::VP_SETCC && "Expected VP_SETCC opcode");
4851 SDValue MaskLo, MaskHi, EVLLo, EVLHi;
4852 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
4853 std::tie(EVLLo, EVLHi) =
4854 DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
4855 LoRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Lo0, Lo1,
4856 N->getOperand(2), MaskLo, EVLLo);
4857 HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1,
4858 N->getOperand(2), MaskHi, EVLHi);
4859 }
4860
4861 EVT ConcatVT = PartResVT.getDoubleNumVectorElementsVT(*DAG.getContext());
4862 SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, LoRes, HiRes);
4863 if (VT == ConcatVT)
4864 return Con;
4865
4866 EVT OpVT = N->getOperand(0).getValueType();
4867 ISD::NodeType ExtendCode =
4868 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
4869 return DAG.getExtOrTrunc(Con, DL, VT, ExtendCode);
4870}
4871
4872
4873SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
4874 // The result has a legal vector type, but the input needs splitting.
4875 EVT ResVT = N->getValueType(0);
4876 SDValue Lo, Hi;
4877 SDLoc DL(N);
4878 GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
4879 EVT InVT = Lo.getValueType();
4880
4881 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
4882 InVT.getVectorElementCount());
4883
4884 if (N->isStrictFPOpcode()) {
4885 Lo = DAG.getNode(N->getOpcode(), DL, {OutVT, MVT::Other},
4886 {N->getOperand(0), Lo, N->getOperand(2)});
4887 Hi = DAG.getNode(N->getOpcode(), DL, {OutVT, MVT::Other},
4888 {N->getOperand(0), Hi, N->getOperand(2)});
4889 // Legalize the chain result - switch anything that used the old chain to
4890 // use the new one.
4891 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
4892 Lo.getValue(1), Hi.getValue(1));
4893 ReplaceValueWith(SDValue(N, 1), NewChain);
4894 } else if (N->getOpcode() == ISD::VP_FP_ROUND) {
4895 SDValue MaskLo, MaskHi, EVLLo, EVLHi;
4896 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
4897 std::tie(EVLLo, EVLHi) =
4898 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), DL);
4899 Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo);
4900 Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi);
4901 } else {
4902 Lo = DAG.getNode(N->getOpcode(), DL, OutVT, Lo, N->getOperand(1));
4903 Hi = DAG.getNode(N->getOpcode(), DL, OutVT, Hi, N->getOperand(1));
4904 }
4905
4906 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
4907}
4908
4909// Split a vector type in an FP binary operation where the second operand has a
4910// different type from the first.
4911//
4912// The result (and the first input) has a legal vector type, but the second
4913// input needs splitting.
4914SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) {
4915 SDLoc DL(N);
4916
4917 EVT LHSLoVT, LHSHiVT;
4918 std::tie(LHSLoVT, LHSHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
4919
4920 if (!isTypeLegal(LHSLoVT) || !isTypeLegal(LHSHiVT))
4921 return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
4922
4923 SDValue LHSLo, LHSHi;
4924 std::tie(LHSLo, LHSHi) =
4925 DAG.SplitVector(N->getOperand(0), DL, LHSLoVT, LHSHiVT);
4926
4927 SDValue RHSLo, RHSHi;
4928 std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL);
4929
4930 SDValue Lo = DAG.getNode(N->getOpcode(), DL, LHSLoVT, LHSLo, RHSLo);
4931 SDValue Hi = DAG.getNode(N->getOpcode(), DL, LHSHiVT, LHSHi, RHSHi);
4932
4933 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi);
4934}
4935
4936SDValue DAGTypeLegalizer::SplitVecOp_CMP(SDNode *N) {
4937 LLVMContext &Ctxt = *DAG.getContext();
4938 SDLoc dl(N);
4939
4940 SDValue LHSLo, LHSHi, RHSLo, RHSHi;
4941 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
4942 GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
4943
4944 EVT ResVT = N->getValueType(0);
4945 ElementCount SplitOpEC = LHSLo.getValueType().getVectorElementCount();
4946 EVT NewResVT =
4947 EVT::getVectorVT(Ctxt, ResVT.getVectorElementType(), SplitOpEC);
4948
4949 SDValue Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSLo, RHSLo);
4950 SDValue Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSHi, RHSHi);
4951
4952 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
4953}
4954
4955SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
4956 EVT ResVT = N->getValueType(0);
4957 SDValue Lo, Hi;
4958 SDLoc dl(N);
4959 GetSplitVector(N->getOperand(0), Lo, Hi);
4960 EVT InVT = Lo.getValueType();
4961
4962 EVT NewResVT =
4963 EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
4964 InVT.getVectorElementCount());
4965
4966 Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, Lo, N->getOperand(1));
4967 Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, Hi, N->getOperand(1));
4968
4969 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
4970}
4971
4972SDValue DAGTypeLegalizer::SplitVecOp_CttzElts(SDNode *N) {
4973 SDLoc DL(N);
4974 EVT ResVT = N->getValueType(0);
4975
4976 SDValue Lo, Hi;
4977 SDValue VecOp = N->getOperand(0);
4978 GetSplitVector(VecOp, Lo, Hi);
4979
4980 // if CTTZ_ELTS(Lo) != VL => CTTZ_ELTS(Lo).
4981 // else => VL + (CTTZ_ELTS(Hi) or CTTZ_ELTS_ZERO_POISON(Hi)).
4982 SDValue ResLo = DAG.getNode(ISD::CTTZ_ELTS, DL, ResVT, Lo);
4983 SDValue VL =
4984 DAG.getElementCount(DL, ResVT, Lo.getValueType().getVectorElementCount());
4985 SDValue ResLoNotVL =
4986 DAG.getSetCC(DL, getSetCCResultType(ResVT), ResLo, VL, ISD::SETNE);
4987 SDValue ResHi = DAG.getNode(N->getOpcode(), DL, ResVT, Hi);
4988 return DAG.getSelect(DL, ResVT, ResLoNotVL, ResLo,
4989 DAG.getNode(ISD::ADD, DL, ResVT, VL, ResHi));
4990}
4991
4992SDValue DAGTypeLegalizer::SplitVecOp_VP_CttzElements(SDNode *N) {
4993 SDLoc DL(N);
4994 EVT ResVT = N->getValueType(0);
4995
4996 SDValue Lo, Hi;
4997 SDValue VecOp = N->getOperand(0);
4998 GetSplitVector(VecOp, Lo, Hi);
4999
5000 auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1));
5001 auto [EVLLo, EVLHi] =
5002 DAG.SplitEVL(N->getOperand(2), VecOp.getValueType(), DL);
5003 SDValue VLo = DAG.getZExtOrTrunc(EVLLo, DL, ResVT);
5004
5005 // if VP_CTTZ_ELTS(Lo) != EVLLo => VP_CTTZ_ELTS(Lo).
5006 // else => EVLLo + (VP_CTTZ_ELTS(Hi) or VP_CTTZ_ELTS_ZERO_POISON(Hi)).
5007 SDValue ResLo = DAG.getNode(ISD::VP_CTTZ_ELTS, DL, ResVT, Lo, MaskLo, EVLLo);
5008 SDValue ResLoNotEVL =
5009 DAG.getSetCC(DL, getSetCCResultType(ResVT), ResLo, VLo, ISD::SETNE);
5010 SDValue ResHi = DAG.getNode(N->getOpcode(), DL, ResVT, Hi, MaskHi, EVLHi);
5011 return DAG.getSelect(DL, ResVT, ResLoNotEVL, ResLo,
5012 DAG.getNode(ISD::ADD, DL, ResVT, VLo, ResHi));
5013}
5014
5015SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) {
5016 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
5017 SDLoc DL(HG);
5018 SDValue Inc = HG->getInc();
5019 SDValue Ptr = HG->getBasePtr();
5020 SDValue Scale = HG->getScale();
5021 SDValue IntID = HG->getIntID();
5022 EVT MemVT = HG->getMemoryVT();
5023 MachineMemOperand *MMO = HG->getMemOperand();
5024 ISD::MemIndexType IndexType = HG->getIndexType();
5025
5026 SDValue IndexLo, IndexHi, MaskLo, MaskHi;
5027 std::tie(IndexLo, IndexHi) = DAG.SplitVector(HG->getIndex(), DL);
5028 std::tie(MaskLo, MaskHi) = DAG.SplitVector(HG->getMask(), DL);
5029 SDValue OpsLo[] = {HG->getChain(), Inc, MaskLo, Ptr, IndexLo, Scale, IntID};
5030 SDValue Lo = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
5031 OpsLo, MMO, IndexType);
5032 SDValue OpsHi[] = {Lo, Inc, MaskHi, Ptr, IndexHi, Scale, IntID};
5033 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, OpsHi,
5034 MMO, IndexType);
5035}
5036
5037SDValue DAGTypeLegalizer::SplitVecOp_PARTIAL_REDUCE_MLA(SDNode *N) {
5038 SDValue Acc = N->getOperand(0);
5039 assert(getTypeAction(Acc.getValueType()) != TargetLowering::TypeSplitVector &&
5040 "Accumulator should already be a legal type, and shouldn't need "
5041 "further splitting");
5042
5043 SDLoc DL(N);
5044 SDValue Input1Lo, Input1Hi, Input2Lo, Input2Hi;
5045 GetSplitVector(N->getOperand(1), Input1Lo, Input1Hi);
5046 GetSplitVector(N->getOperand(2), Input2Lo, Input2Hi);
5047 unsigned Opcode = N->getOpcode();
5048 EVT ResultVT = Acc.getValueType();
5049
5050 SDValue Lo = DAG.getNode(Opcode, DL, ResultVT, Acc, Input1Lo, Input2Lo);
5051 return DAG.getNode(Opcode, DL, ResultVT, Lo, Input1Hi, Input2Hi);
5052}
5053
5054//===----------------------------------------------------------------------===//
5055// Result Vector Widening
5056//===----------------------------------------------------------------------===//
5057
5058void DAGTypeLegalizer::ReplaceOtherWidenResults(SDNode *N, SDNode *WidenNode,
5059 unsigned WidenResNo) {
5060 unsigned NumResults = N->getNumValues();
5061 for (unsigned ResNo = 0; ResNo < NumResults; ResNo++) {
5062 if (ResNo == WidenResNo)
5063 continue;
5064 EVT ResVT = N->getValueType(ResNo);
5065 if (getTypeAction(ResVT) == TargetLowering::TypeWidenVector) {
5066 SetWidenedVector(SDValue(N, ResNo), SDValue(WidenNode, ResNo));
5067 } else {
5068 SDLoc DL(N);
5069 SDValue ResVal =
5070 DAG.getExtractSubvector(DL, ResVT, SDValue(WidenNode, ResNo), 0);
5071 ReplaceValueWith(SDValue(N, ResNo), ResVal);
5072 }
5073 }
5074}
5075
5076void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
5077 LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG));
5078
5079 // See if the target wants to custom widen this node.
5080 if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
5081 return;
5082
5083 SDValue Res = SDValue();
5084
5085 auto unrollExpandedOp = [&]() {
5086 // We're going to widen this vector op to a legal type by padding with undef
5087 // elements. If the wide vector op is eventually going to be expanded to
5088 // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
5089 // libcalls on the undef elements.
5090 EVT VT = N->getValueType(0);
5091 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
5092 if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) &&
5093 TLI.isOperationExpandOrLibCall(N->getOpcode(), VT.getScalarType())) {
5094 Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
5095 if (N->getNumValues() > 1)
5096 ReplaceOtherWidenResults(N, Res.getNode(), ResNo);
5097 return true;
5098 }
5099 return false;
5100 };
5101
5102 switch (N->getOpcode()) {
5103 default:
5104#ifndef NDEBUG
5105 dbgs() << "WidenVectorResult #" << ResNo << ": ";
5106 N->dump(&DAG);
5107 dbgs() << "\n";
5108#endif
5109 report_fatal_error("Do not know how to widen the result of this operator!");
5110
5113 Res = WidenVecRes_LOOP_DEPENDENCE_MASK(N);
5114 break;
5115 case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
5116 case ISD::ADDRSPACECAST:
5117 Res = WidenVecRes_ADDRSPACECAST(N);
5118 break;
5119 case ISD::AssertZext: Res = WidenVecRes_AssertZext(N); break;
5120 case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
5121 case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
5122 case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
5124 Res = WidenVecRes_INSERT_SUBVECTOR(N);
5125 break;
5126 case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
5127 case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
5128 case ISD::ATOMIC_LOAD:
5129 Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
5130 break;
5131 case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
5132 case ISD::STEP_VECTOR:
5133 case ISD::SPLAT_VECTOR:
5135 Res = WidenVecRes_ScalarOp(N);
5136 break;
5137 case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
5138 case ISD::VSELECT:
5139 case ISD::SELECT:
5140 case ISD::VP_SELECT:
5141 case ISD::VP_MERGE:
5142 Res = WidenVecRes_Select(N);
5143 break;
5144 case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
5145 case ISD::VP_SETCC:
5146 case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
5147 case ISD::POISON:
5148 case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
5150 Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
5151 break;
5152 case ISD::VP_LOAD:
5153 Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
5154 break;
5155 case ISD::VP_LOAD_FF:
5156 Res = WidenVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N));
5157 break;
5158 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
5159 Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
5160 break;
5162 Res = WidenVecRes_VECTOR_COMPRESS(N);
5163 break;
5164 case ISD::MLOAD:
5165 Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
5166 break;
5167 case ISD::MGATHER:
5168 Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
5169 break;
5170 case ISD::VP_GATHER:
5171 Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N));
5172 break;
5174 Res = WidenVecRes_VECTOR_REVERSE(N);
5175 break;
5177 Res = WidenVecRes_GET_ACTIVE_LANE_MASK(N);
5178 break;
5179
5180 case ISD::ADD: case ISD::VP_ADD:
5181 case ISD::AND: case ISD::VP_AND:
5182 case ISD::MUL: case ISD::VP_MUL:
5183 case ISD::MULHS:
5184 case ISD::MULHU:
5185 case ISD::ABDS:
5186 case ISD::ABDU:
5187 case ISD::OR: case ISD::VP_OR:
5188 case ISD::SUB: case ISD::VP_SUB:
5189 case ISD::XOR: case ISD::VP_XOR:
5190 case ISD::SHL: case ISD::VP_SHL:
5191 case ISD::SRA: case ISD::VP_SRA:
5192 case ISD::SRL: case ISD::VP_SRL:
5193 case ISD::CLMUL:
5194 case ISD::CLMULR:
5195 case ISD::CLMULH:
5196 case ISD::FMINNUM:
5197 case ISD::FMINNUM_IEEE:
5198 case ISD::VP_FMINNUM:
5199 case ISD::FMAXNUM:
5200 case ISD::FMAXNUM_IEEE:
5201 case ISD::VP_FMAXNUM:
5202 case ISD::FMINIMUM:
5203 case ISD::VP_FMINIMUM:
5204 case ISD::FMAXIMUM:
5205 case ISD::VP_FMAXIMUM:
5206 case ISD::FMINIMUMNUM:
5207 case ISD::FMAXIMUMNUM:
5208 case ISD::SMIN: case ISD::VP_SMIN:
5209 case ISD::SMAX: case ISD::VP_SMAX:
5210 case ISD::UMIN: case ISD::VP_UMIN:
5211 case ISD::UMAX: case ISD::VP_UMAX:
5212 case ISD::UADDSAT: case ISD::VP_UADDSAT:
5213 case ISD::SADDSAT: case ISD::VP_SADDSAT:
5214 case ISD::USUBSAT: case ISD::VP_USUBSAT:
5215 case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
5216 case ISD::SSHLSAT:
5217 case ISD::USHLSAT:
5218 case ISD::ROTL:
5219 case ISD::ROTR:
5220 case ISD::AVGFLOORS:
5221 case ISD::AVGFLOORU:
5222 case ISD::AVGCEILS:
5223 case ISD::AVGCEILU:
5224 // Vector-predicated binary op widening. Note that -- unlike the
5225 // unpredicated versions -- we don't have to worry about trapping on
5226 // operations like UDIV, FADD, etc., as we pass on the original vector
5227 // length parameter. This means the widened elements containing garbage
5228 // aren't active.
5229 case ISD::VP_SDIV:
5230 case ISD::VP_UDIV:
5231 case ISD::VP_SREM:
5232 case ISD::VP_UREM:
5233 case ISD::VP_FADD:
5234 case ISD::VP_FSUB:
5235 case ISD::VP_FMUL:
5236 case ISD::VP_FDIV:
5237 case ISD::VP_FREM:
5238 case ISD::VP_FCOPYSIGN:
5239 Res = WidenVecRes_Binary(N);
5240 break;
5241
5242 case ISD::MASKED_UDIV:
5243 case ISD::MASKED_SDIV:
5244 case ISD::MASKED_UREM:
5245 case ISD::MASKED_SREM:
5246 Res = WidenVecRes_MaskedBinary(N);
5247 break;
5248
5249 case ISD::SCMP:
5250 case ISD::UCMP:
5251 Res = WidenVecRes_CMP(N);
5252 break;
5253
5254 case ISD::FPOW:
5255 case ISD::FATAN2:
5256 case ISD::FREM:
5257 if (unrollExpandedOp())
5258 break;
5259 // If the target has custom/legal support for the scalar FP intrinsic ops
5260 // (they are probably not destined to become libcalls), then widen those
5261 // like any other binary ops.
5262 [[fallthrough]];
5263
5264 case ISD::FADD:
5265 case ISD::FMUL:
5266 case ISD::FSUB:
5267 case ISD::FDIV:
5268 case ISD::SDIV:
5269 case ISD::UDIV:
5270 case ISD::SREM:
5271 case ISD::UREM:
5272 Res = WidenVecRes_BinaryCanTrap(N);
5273 break;
5274
5275 case ISD::SMULFIX:
5276 case ISD::SMULFIXSAT:
5277 case ISD::UMULFIX:
5278 case ISD::UMULFIXSAT:
5279 // These are binary operations, but with an extra operand that shouldn't
5280 // be widened (the scale).
5281 Res = WidenVecRes_BinaryWithExtraScalarOp(N);
5282 break;
5283
5284#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
5285 case ISD::STRICT_##DAGN:
5286#include "llvm/IR/ConstrainedOps.def"
5287 Res = WidenVecRes_StrictFP(N);
5288 break;
5289
5290 case ISD::UADDO:
5291 case ISD::SADDO:
5292 case ISD::USUBO:
5293 case ISD::SSUBO:
5294 case ISD::UMULO:
5295 case ISD::SMULO:
5296 Res = WidenVecRes_OverflowOp(N, ResNo);
5297 break;
5298
5299 case ISD::FCOPYSIGN:
5300 Res = WidenVecRes_FCOPYSIGN(N);
5301 break;
5302
5303 case ISD::IS_FPCLASS:
5304 case ISD::FPTRUNC_ROUND:
5305 Res = WidenVecRes_UnarySameEltsWithScalarArg(N);
5306 break;
5307
5308 case ISD::FLDEXP:
5309 case ISD::FPOWI:
5310 if (!unrollExpandedOp())
5311 Res = WidenVecRes_ExpOp(N);
5312 break;
5313
5317 Res = WidenVecRes_EXTEND_VECTOR_INREG(N);
5318 break;
5319
5320 case ISD::ANY_EXTEND:
5321 case ISD::FP_EXTEND:
5322 case ISD::VP_FP_EXTEND:
5323 case ISD::FP_ROUND:
5324 case ISD::VP_FP_ROUND:
5325 case ISD::FP_TO_SINT:
5326 case ISD::VP_FP_TO_SINT:
5327 case ISD::FP_TO_UINT:
5328 case ISD::VP_FP_TO_UINT:
5329 case ISD::SIGN_EXTEND:
5330 case ISD::VP_SIGN_EXTEND:
5331 case ISD::SINT_TO_FP:
5332 case ISD::VP_SINT_TO_FP:
5333 case ISD::VP_TRUNCATE:
5334 case ISD::TRUNCATE:
5335 case ISD::UINT_TO_FP:
5336 case ISD::VP_UINT_TO_FP:
5337 case ISD::ZERO_EXTEND:
5338 case ISD::VP_ZERO_EXTEND:
5340 Res = WidenVecRes_Convert(N);
5341 break;
5342
5345 Res = WidenVecRes_FP_TO_XINT_SAT(N);
5346 break;
5347
5348 case ISD::LRINT:
5349 case ISD::LLRINT:
5350 case ISD::VP_LRINT:
5351 case ISD::VP_LLRINT:
5352 case ISD::LROUND:
5353 case ISD::LLROUND:
5354 Res = WidenVecRes_XROUND(N);
5355 break;
5356
5357 case ISD::FACOS:
5358 case ISD::FASIN:
5359 case ISD::FATAN:
5360 case ISD::FCEIL:
5361 case ISD::FCOS:
5362 case ISD::FCOSH:
5363 case ISD::FEXP:
5364 case ISD::FEXP2:
5365 case ISD::FEXP10:
5366 case ISD::FFLOOR:
5367 case ISD::FLOG:
5368 case ISD::FLOG10:
5369 case ISD::FLOG2:
5370 case ISD::FNEARBYINT:
5371 case ISD::FRINT:
5372 case ISD::FROUND:
5373 case ISD::FROUNDEVEN:
5374 case ISD::FSIN:
5375 case ISD::FSINH:
5376 case ISD::FSQRT:
5377 case ISD::FTAN:
5378 case ISD::FTANH:
5379 case ISD::FTRUNC:
5380 if (unrollExpandedOp())
5381 break;
5382 // If the target has custom/legal support for the scalar FP intrinsic ops
5383 // (they are probably not destined to become libcalls), then widen those
5384 // like any other unary ops.
5385 [[fallthrough]];
5386
5387 case ISD::ABS:
5389 case ISD::VP_ABS:
5390 case ISD::BITREVERSE:
5391 case ISD::VP_BITREVERSE:
5392 case ISD::BSWAP:
5393 case ISD::VP_BSWAP:
5394 case ISD::CTLZ:
5395 case ISD::VP_CTLZ:
5397 case ISD::VP_CTLZ_ZERO_POISON:
5398 case ISD::CTPOP:
5399 case ISD::VP_CTPOP:
5400 case ISD::CTTZ:
5401 case ISD::VP_CTTZ:
5403 case ISD::VP_CTTZ_ZERO_POISON:
5404 case ISD::FNEG: case ISD::VP_FNEG:
5405 case ISD::FABS: case ISD::VP_FABS:
5406 case ISD::VP_SQRT:
5407 case ISD::VP_FCEIL:
5408 case ISD::VP_FFLOOR:
5409 case ISD::VP_FRINT:
5410 case ISD::VP_FNEARBYINT:
5411 case ISD::VP_FROUND:
5412 case ISD::VP_FROUNDEVEN:
5413 case ISD::VP_FROUNDTOZERO:
5414 case ISD::FREEZE:
5415 case ISD::ARITH_FENCE:
5416 case ISD::FCANONICALIZE:
5418 Res = WidenVecRes_Unary(N);
5419 break;
5420 case ISD::FMA: case ISD::VP_FMA:
5421 case ISD::FSHL:
5422 case ISD::VP_FSHL:
5423 case ISD::FSHR:
5424 case ISD::VP_FSHR:
5425 Res = WidenVecRes_Ternary(N);
5426 break;
5427 case ISD::FMODF:
5428 case ISD::FFREXP:
5429 case ISD::FSINCOS:
5430 case ISD::FSINCOSPI: {
5431 if (!unrollExpandedOp())
5432 Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo);
5433 break;
5434 }
5435 }
5436
5437 // If Res is null, the sub-method took care of registering the result.
5438 if (Res.getNode())
5439 SetWidenedVector(SDValue(N, ResNo), Res);
5440}
5441
5442SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
5443 // Ternary op widening.
5444 SDLoc dl(N);
5445 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5446 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5447 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5448 SDValue InOp3 = GetWidenedVector(N->getOperand(2));
5449 if (N->getNumOperands() == 3)
5450 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
5451
5452 assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
5453 assert(N->isVPOpcode() && "Expected VP opcode");
5454
5455 SDValue Mask =
5456 GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
5457 return DAG.getNode(N->getOpcode(), dl, WidenVT,
5458 {InOp1, InOp2, InOp3, Mask, N->getOperand(4)});
5459}
5460
5461SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
5462 // Binary op widening.
5463 SDLoc dl(N);
5464 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5465 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5466 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5467 if (N->getNumOperands() == 2)
5468 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2,
5469 N->getFlags());
5470
5471 assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
5472 assert(N->isVPOpcode() && "Expected VP opcode");
5473
5474 SDValue Mask =
5475 GetWidenedMask(N->getOperand(2), WidenVT.getVectorElementCount());
5476 return DAG.getNode(N->getOpcode(), dl, WidenVT,
5477 {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
5478}
5479
5480SDValue DAGTypeLegalizer::WidenVecRes_MaskedBinary(SDNode *N) {
5481 SDLoc dl(N);
5482 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5483 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5484 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5485 SDValue Mask = N->getOperand(2);
5486 EVT WideMaskVT = WidenVT.changeVectorElementType(
5487 *DAG.getContext(), Mask.getValueType().getVectorElementType());
5488 Mask = ModifyToType(Mask, WideMaskVT, /*FillWithZeros=*/true);
5489 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Mask,
5490 N->getFlags());
5491}
5492
5493SDValue DAGTypeLegalizer::WidenVecRes_CMP(SDNode *N) {
5494 LLVMContext &Ctxt = *DAG.getContext();
5495 SDLoc dl(N);
5496
5497 SDValue LHS = N->getOperand(0);
5498 SDValue RHS = N->getOperand(1);
5499 EVT OpVT = LHS.getValueType();
5500 if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector) {
5501 LHS = GetWidenedVector(LHS);
5502 RHS = GetWidenedVector(RHS);
5503 OpVT = LHS.getValueType();
5504 }
5505
5506 EVT WidenResVT = TLI.getTypeToTransformTo(Ctxt, N->getValueType(0));
5507 ElementCount WidenResEC = WidenResVT.getVectorElementCount();
5508 if (WidenResEC == OpVT.getVectorElementCount()) {
5509 return DAG.getNode(N->getOpcode(), dl, WidenResVT, LHS, RHS);
5510 }
5511
5512 return DAG.UnrollVectorOp(N, WidenResVT.getVectorNumElements());
5513}
5514
5515SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
5516 // Binary op widening, but with an extra operand that shouldn't be widened.
5517 SDLoc dl(N);
5518 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5519 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5520 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5521 SDValue InOp3 = N->getOperand(2);
5522 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3,
5523 N->getFlags());
5524}
5525
5526// Given a vector of operations that have been broken up to widen, see
5527// if we can collect them together into the next widest legal VT. This
5528// implementation is trap-safe.
5530 SmallVectorImpl<SDValue> &ConcatOps,
5531 unsigned ConcatEnd, EVT VT, EVT MaxVT,
5532 EVT WidenVT) {
5533 // Check to see if we have a single operation with the widen type.
5534 if (ConcatEnd == 1) {
5535 VT = ConcatOps[0].getValueType();
5536 if (VT == WidenVT)
5537 return ConcatOps[0];
5538 }
5539
5540 SDLoc dl(ConcatOps[0]);
5541 EVT WidenEltVT = WidenVT.getVectorElementType();
5542
5543 // while (Some element of ConcatOps is not of type MaxVT) {
5544 // From the end of ConcatOps, collect elements of the same type and put
5545 // them into an op of the next larger supported type
5546 // }
5547 while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
5548 int Idx = ConcatEnd - 1;
5549 VT = ConcatOps[Idx--].getValueType();
5550 while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
5551 Idx--;
5552
5553 int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
5554 EVT NextVT;
5555 do {
5556 NextSize *= 2;
5557 NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
5558 } while (!TLI.isTypeLegal(NextVT));
5559
5560 if (!VT.isVector()) {
5561 // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
5562 SDValue VecOp = DAG.getPOISON(NextVT);
5563 unsigned NumToInsert = ConcatEnd - Idx - 1;
5564 for (unsigned i = 0, OpIdx = Idx + 1; i < NumToInsert; i++, OpIdx++)
5565 VecOp = DAG.getInsertVectorElt(dl, VecOp, ConcatOps[OpIdx], i);
5566 ConcatOps[Idx+1] = VecOp;
5567 ConcatEnd = Idx + 2;
5568 } else {
5569 // Vector type, create a CONCAT_VECTORS of type NextVT
5570 SDValue undefVec = DAG.getPOISON(VT);
5571 unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
5572 SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
5573 unsigned RealVals = ConcatEnd - Idx - 1;
5574 unsigned SubConcatEnd = 0;
5575 unsigned SubConcatIdx = Idx + 1;
5576 while (SubConcatEnd < RealVals)
5577 SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
5578 while (SubConcatEnd < OpsToConcat)
5579 SubConcatOps[SubConcatEnd++] = undefVec;
5580 ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
5581 NextVT, SubConcatOps);
5582 ConcatEnd = SubConcatIdx + 1;
5583 }
5584 }
5585
5586 // Check to see if we have a single operation with the widen type.
5587 if (ConcatEnd == 1) {
5588 VT = ConcatOps[0].getValueType();
5589 if (VT == WidenVT)
5590 return ConcatOps[0];
5591 }
5592
5593 // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
5594 unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
5595 if (NumOps != ConcatEnd ) {
5596 SDValue UndefVal = DAG.getPOISON(MaxVT);
5597 for (unsigned j = ConcatEnd; j < NumOps; ++j)
5598 ConcatOps[j] = UndefVal;
5599 }
5600 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
5601 ArrayRef(ConcatOps.data(), NumOps));
5602}
5603
5604SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
5605 // Binary op widening for operations that can trap.
5606 unsigned Opcode = N->getOpcode();
5607 SDLoc dl(N);
5608 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5609 EVT WidenEltVT = WidenVT.getVectorElementType();
5610 EVT VT = WidenVT;
5611 unsigned NumElts = VT.getVectorMinNumElements();
5612 const SDNodeFlags Flags = N->getFlags();
5613 while (!TLI.isTypeLegal(VT) && NumElts != 1) {
5614 NumElts = NumElts / 2;
5615 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
5616 }
5617
5618 if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
5619 // Operation doesn't trap so just widen as normal.
5620 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5621 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5622 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
5623 }
5624
5625 // Generate a vp.op if it is custom/legal for the target. This avoids need
5626 // to split and tile the subvectors (below), because the inactive lanes can
5627 // simply be disabled. To avoid possible recursion, only do this if the
5628 // widened mask type is legal.
5629 if (auto VPOpcode = ISD::getVPForBaseOpcode(Opcode);
5630 VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WidenVT)) {
5631 if (EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
5632 WidenVT.getVectorElementCount());
5633 TLI.isTypeLegal(WideMaskVT)) {
5634 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5635 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5636 SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
5637 SDValue EVL =
5638 DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
5639 N->getValueType(0).getVectorElementCount());
5640 return DAG.getNode(*VPOpcode, dl, WidenVT, InOp1, InOp2, Mask, EVL,
5641 Flags);
5642 }
5643 }
5644
5645 // FIXME: Improve support for scalable vectors.
5646 assert(!VT.isScalableVector() && "Scalable vectors not handled yet.");
5647
5648 // No legal vector version so unroll the vector operation and then widen.
5649 if (NumElts == 1)
5650 return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
5651
5652 // Since the operation can trap, apply operation on the original vector.
5653 EVT MaxVT = VT;
5654 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5655 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5656 unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
5657
5658 SmallVector<SDValue, 16> ConcatOps(CurNumElts);
5659 unsigned ConcatEnd = 0; // Current ConcatOps index.
5660 int Idx = 0; // Current Idx into input vectors.
5661
5662 // NumElts := greatest legal vector size (at most WidenVT)
5663 // while (orig. vector has unhandled elements) {
5664 // take munches of size NumElts from the beginning and add to ConcatOps
5665 // NumElts := next smaller supported vector size or 1
5666 // }
5667 while (CurNumElts != 0) {
5668 while (CurNumElts >= NumElts) {
5669 SDValue EOp1 = DAG.getExtractSubvector(dl, VT, InOp1, Idx);
5670 SDValue EOp2 = DAG.getExtractSubvector(dl, VT, InOp2, Idx);
5671 ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
5672 Idx += NumElts;
5673 CurNumElts -= NumElts;
5674 }
5675 do {
5676 NumElts = NumElts / 2;
5677 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
5678 } while (!TLI.isTypeLegal(VT) && NumElts != 1);
5679
5680 if (NumElts == 1) {
5681 for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
5682 SDValue EOp1 = DAG.getExtractVectorElt(dl, WidenEltVT, InOp1, Idx);
5683 SDValue EOp2 = DAG.getExtractVectorElt(dl, WidenEltVT, InOp2, Idx);
5684 ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
5685 EOp1, EOp2, Flags);
5686 }
5687 CurNumElts = 0;
5688 }
5689 }
5690
5691 return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
5692}
5693
5694SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
5695 switch (N->getOpcode()) {
5696 case ISD::STRICT_FSETCC:
5698 return WidenVecRes_STRICT_FSETCC(N);
5705 return WidenVecRes_Convert_StrictFP(N);
5706 default:
5707 break;
5708 }
5709
5710 // StrictFP op widening for operations that can trap.
5711 unsigned NumOpers = N->getNumOperands();
5712 unsigned Opcode = N->getOpcode();
5713 SDLoc dl(N);
5714 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5715 EVT WidenEltVT = WidenVT.getVectorElementType();
5716 EVT VT = WidenVT;
5717 unsigned NumElts = VT.getVectorNumElements();
5718 while (!TLI.isTypeLegal(VT) && NumElts != 1) {
5719 NumElts = NumElts / 2;
5720 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
5721 }
5722
5723 // No legal vector version so unroll the vector operation and then widen.
5724 if (NumElts == 1)
5725 return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
5726
5727 // Since the operation can trap, apply operation on the original vector.
5728 EVT MaxVT = VT;
5730 unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
5731
5732 SmallVector<SDValue, 16> ConcatOps(CurNumElts);
5734 unsigned ConcatEnd = 0; // Current ConcatOps index.
5735 int Idx = 0; // Current Idx into input vectors.
5736
5737 // The Chain is the first operand.
5738 InOps.push_back(N->getOperand(0));
5739
5740 // Now process the remaining operands.
5741 for (unsigned i = 1; i < NumOpers; ++i) {
5742 SDValue Oper = N->getOperand(i);
5743
5744 EVT OpVT = Oper.getValueType();
5745 if (OpVT.isVector()) {
5746 if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector)
5747 Oper = GetWidenedVector(Oper);
5748 else {
5749 EVT WideOpVT =
5750 EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
5751 WidenVT.getVectorElementCount());
5752 Oper = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5753 DAG.getPOISON(WideOpVT), Oper,
5754 DAG.getVectorIdxConstant(0, dl));
5755 }
5756 }
5757
5758 InOps.push_back(Oper);
5759 }
5760
5761 // NumElts := greatest legal vector size (at most WidenVT)
5762 // while (orig. vector has unhandled elements) {
5763 // take munches of size NumElts from the beginning and add to ConcatOps
5764 // NumElts := next smaller supported vector size or 1
5765 // }
5766 while (CurNumElts != 0) {
5767 while (CurNumElts >= NumElts) {
5769
5770 for (unsigned i = 0; i < NumOpers; ++i) {
5771 SDValue Op = InOps[i];
5772
5773 EVT OpVT = Op.getValueType();
5774 if (OpVT.isVector()) {
5775 EVT OpExtractVT =
5776 EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
5778 Op = DAG.getExtractSubvector(dl, OpExtractVT, Op, Idx);
5779 }
5780
5781 EOps.push_back(Op);
5782 }
5783
5784 EVT OperVT[] = {VT, MVT::Other};
5785 SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps);
5786 ConcatOps[ConcatEnd++] = Oper;
5787 Chains.push_back(Oper.getValue(1));
5788 Idx += NumElts;
5789 CurNumElts -= NumElts;
5790 }
5791 do {
5792 NumElts = NumElts / 2;
5793 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
5794 } while (!TLI.isTypeLegal(VT) && NumElts != 1);
5795
5796 if (NumElts == 1) {
5797 for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
5799
5800 for (unsigned i = 0; i < NumOpers; ++i) {
5801 SDValue Op = InOps[i];
5802
5803 EVT OpVT = Op.getValueType();
5804 if (OpVT.isVector())
5805 Op = DAG.getExtractVectorElt(dl, OpVT.getVectorElementType(), Op,
5806 Idx);
5807
5808 EOps.push_back(Op);
5809 }
5810
5811 EVT WidenVT[] = {WidenEltVT, MVT::Other};
5812 SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps);
5813 ConcatOps[ConcatEnd++] = Oper;
5814 Chains.push_back(Oper.getValue(1));
5815 }
5816 CurNumElts = 0;
5817 }
5818 }
5819
5820 // Build a factor node to remember all the Ops that have been created.
5821 SDValue NewChain;
5822 if (Chains.size() == 1)
5823 NewChain = Chains[0];
5824 else
5825 NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
5826 ReplaceValueWith(SDValue(N, 1), NewChain);
5827
5828 return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
5829}
5830
5831SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
5832 SDLoc DL(N);
5833 EVT ResVT = N->getValueType(0);
5834 EVT OvVT = N->getValueType(1);
5835 EVT WideResVT, WideOvVT;
5836 SDValue WideLHS, WideRHS;
5837
5838 // TODO: This might result in a widen/split loop.
5839 if (ResNo == 0) {
5840 WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT);
5841 WideOvVT = EVT::getVectorVT(
5842 *DAG.getContext(), OvVT.getVectorElementType(),
5843 WideResVT.getVectorNumElements());
5844
5845 WideLHS = GetWidenedVector(N->getOperand(0));
5846 WideRHS = GetWidenedVector(N->getOperand(1));
5847 } else {
5848 WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT);
5849 WideResVT = EVT::getVectorVT(
5850 *DAG.getContext(), ResVT.getVectorElementType(),
5851 WideOvVT.getVectorNumElements());
5852
5853 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
5854 SDValue Poison = DAG.getPOISON(WideResVT);
5855
5856 WideLHS = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideResVT, Poison,
5857 N->getOperand(0), Zero);
5858 WideRHS = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideResVT, Poison,
5859 N->getOperand(1), Zero);
5860 }
5861
5862 SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT);
5863 SDNode *WideNode = DAG.getNode(
5864 N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode();
5865
5866 // Replace the other vector result not being explicitly widened here.
5867 unsigned OtherNo = 1 - ResNo;
5868 EVT OtherVT = N->getValueType(OtherNo);
5869 if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
5870 SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo));
5871 } else {
5872 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
5873 SDValue OtherVal = DAG.getNode(
5874 ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero);
5875 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
5876 }
5877
5878 return SDValue(WideNode, ResNo);
5879}
5880
5881SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
5882 LLVMContext &Ctx = *DAG.getContext();
5883 SDValue InOp = N->getOperand(0);
5884 SDLoc DL(N);
5885
5886 EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
5887 ElementCount WidenEC = WidenVT.getVectorElementCount();
5888
5889 EVT InVT = InOp.getValueType();
5890
5891 unsigned Opcode = N->getOpcode();
5892 const SDNodeFlags Flags = N->getFlags();
5893
5894 // Handle the case of ZERO_EXTEND where the promoted InVT element size does
5895 // not equal that of WidenVT.
5896 if (N->getOpcode() == ISD::ZERO_EXTEND &&
5897 getTypeAction(InVT) == TargetLowering::TypePromoteInteger &&
5898 TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() !=
5899 WidenVT.getScalarSizeInBits()) {
5900 InOp = ZExtPromotedInteger(InOp);
5901 InVT = InOp.getValueType();
5902 if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits())
5903 Opcode = ISD::TRUNCATE;
5904 }
5905
5906 EVT InEltVT = InVT.getVectorElementType();
5907 EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);
5908 ElementCount InVTEC = InVT.getVectorElementCount();
5909
5910 if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
5911 InOp = GetWidenedVector(N->getOperand(0));
5912 InVT = InOp.getValueType();
5913 InVTEC = InVT.getVectorElementCount();
5914 if (InVTEC == WidenEC) {
5915 if (N->getNumOperands() == 1)
5916 return DAG.getNode(Opcode, DL, WidenVT, InOp, Flags);
5917 if (N->getNumOperands() == 3) {
5918 assert(N->isVPOpcode() && "Expected VP opcode");
5919 SDValue Mask =
5920 GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
5921 return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2));
5922 }
5923 return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
5924 }
5925 if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
5926 // If both input and result vector types are of same width, extend
5927 // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
5928 // accepts fewer elements in the result than in the input.
5929 if (Opcode == ISD::ANY_EXTEND)
5930 return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
5931 if (Opcode == ISD::SIGN_EXTEND)
5932 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
5933 if (Opcode == ISD::ZERO_EXTEND)
5934 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
5935 }
5936
5937 // For TRUNCATE, try to widen using the legal EC of the input type instead
5938 // if the legalisation action for that intermediate type is not widening.
5939 // E.g. for trunc nxv1i64 -> nxv1i8 where
5940 // - nxv1i64 input gets widened to nxv2i64
5941 // - nxv1i8 output gets widened to nxv16i8
5942 // Then one can try widening the result to nxv2i8 (instead of going all the
5943 // way to nxv16i8) if this later allows type promotion.
5944 EVT MidResVT =
5945 EVT::getVectorVT(Ctx, WidenVT.getVectorElementType(), InVTEC);
5946 if (N->getOpcode() == ISD::TRUNCATE &&
5947 getTypeAction(MidResVT) == TargetLowering::TypePromoteInteger) {
5948 SDValue MidRes = DAG.getNode(ISD::TRUNCATE, DL, MidResVT, InOp, Flags);
5949 return DAG.getInsertSubvector(DL, DAG.getPOISON(WidenVT), MidRes, 0);
5950 }
5951 }
5952
5953 if (TLI.isTypeLegal(InWidenVT)) {
5954 // Because the result and the input are different vector types, widening
5955 // the result could create a legal type but widening the input might make
5956 // it an illegal type that might lead to repeatedly splitting the input
5957 // and then widening it. To avoid this, we widen the input only if
5958 // it results in a legal type.
5959 if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {
5960 // Widen the input and call convert on the widened input vector.
5961 unsigned NumConcat =
5962 WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
5963 SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT));
5964 Ops[0] = InOp;
5965 SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
5966 if (N->getNumOperands() == 1)
5967 return DAG.getNode(Opcode, DL, WidenVT, InVec, Flags);
5968 return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
5969 }
5970
5971 if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {
5972 SDValue InVal = DAG.getExtractSubvector(DL, InWidenVT, InOp, 0);
5973 // Extract the input and convert the shorten input vector.
5974 if (N->getNumOperands() == 1)
5975 return DAG.getNode(Opcode, DL, WidenVT, InVal, Flags);
5976 return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
5977 }
5978 }
5979
5980 // Otherwise unroll into some nasty scalar code and rebuild the vector.
5981 EVT EltVT = WidenVT.getVectorElementType();
5982 SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT));
5983 // Use the original element count so we don't do more scalar opts than
5984 // necessary.
5985 unsigned MinElts = N->getValueType(0).getVectorNumElements();
5986 for (unsigned i=0; i < MinElts; ++i) {
5987 SDValue Val = DAG.getExtractVectorElt(DL, InEltVT, InOp, i);
5988 if (N->getNumOperands() == 1)
5989 Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, Flags);
5990 else
5991 Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
5992 }
5993
5994 return DAG.getBuildVector(WidenVT, DL, Ops);
5995}
5996
5997SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) {
5998 SDLoc dl(N);
5999 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6000 ElementCount WidenNumElts = WidenVT.getVectorElementCount();
6001
6002 SDValue Src = N->getOperand(0);
6003 EVT SrcVT = Src.getValueType();
6004
6005 // Also widen the input.
6006 if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
6007 Src = GetWidenedVector(Src);
6008 SrcVT = Src.getValueType();
6009 }
6010
6011 // Input and output not widened to the same size, give up.
6012 if (WidenNumElts != SrcVT.getVectorElementCount())
6013 return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
6014
6015 return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1));
6016}
6017
6018SDValue DAGTypeLegalizer::WidenVecRes_XROUND(SDNode *N) {
6019 SDLoc dl(N);
6020 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6021 ElementCount WidenNumElts = WidenVT.getVectorElementCount();
6022
6023 SDValue Src = N->getOperand(0);
6024 EVT SrcVT = Src.getValueType();
6025
6026 // Also widen the input.
6027 if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
6028 Src = GetWidenedVector(Src);
6029 SrcVT = Src.getValueType();
6030 }
6031
6032 // Input and output not widened to the same size, give up.
6033 if (WidenNumElts != SrcVT.getVectorElementCount())
6034 return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
6035
6036 if (N->getNumOperands() == 1)
6037 return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
6038
6039 assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
6040 assert(N->isVPOpcode() && "Expected VP opcode");
6041
6042 SDValue Mask =
6043 GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
6044 return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, Mask, N->getOperand(2));
6045}
6046
6047SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
6048 SDValue InOp = N->getOperand(1);
6049 SDLoc DL(N);
6050 SmallVector<SDValue, 4> NewOps(N->ops());
6051
6052 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6053 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6054
6055 EVT InVT = InOp.getValueType();
6056 EVT InEltVT = InVT.getVectorElementType();
6057
6058 unsigned Opcode = N->getOpcode();
6059
6060 // FIXME: Optimizations need to be implemented here.
6061
6062 // Otherwise unroll into some nasty scalar code and rebuild the vector.
6063 EVT EltVT = WidenVT.getVectorElementType();
6064 std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
6065 SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT));
6066 SmallVector<SDValue, 32> OpChains;
6067 // Use the original element count so we don't do more scalar opts than
6068 // necessary.
6069 unsigned MinElts = N->getValueType(0).getVectorNumElements();
6070 for (unsigned i=0; i < MinElts; ++i) {
6071 NewOps[1] = DAG.getExtractVectorElt(DL, InEltVT, InOp, i);
6072 Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
6073 OpChains.push_back(Ops[i].getValue(1));
6074 }
6075 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
6076 ReplaceValueWith(SDValue(N, 1), NewChain);
6077
6078 return DAG.getBuildVector(WidenVT, DL, Ops);
6079}
6080
6081SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
6082 unsigned Opcode = N->getOpcode();
6083 SDValue InOp = N->getOperand(0);
6084 SDLoc DL(N);
6085
6086 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6087 EVT WidenSVT = WidenVT.getVectorElementType();
6088 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6089
6090 EVT InVT = InOp.getValueType();
6091 EVT InSVT = InVT.getVectorElementType();
6092 unsigned InVTNumElts = InVT.getVectorNumElements();
6093
6094 if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
6095 InOp = GetWidenedVector(InOp);
6096 InVT = InOp.getValueType();
6097 if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
6098 switch (Opcode) {
6102 return DAG.getNode(Opcode, DL, WidenVT, InOp);
6103 }
6104 }
6105 }
6106
6107 // Unroll, extend the scalars and rebuild the vector.
6109 for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
6110 SDValue Val = DAG.getExtractVectorElt(DL, InSVT, InOp, i);
6111 switch (Opcode) {
6113 Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
6114 break;
6116 Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val);
6117 break;
6119 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val);
6120 break;
6121 default:
6122 llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected");
6123 }
6124 Ops.push_back(Val);
6125 }
6126
6127 while (Ops.size() != WidenNumElts)
6128 Ops.push_back(DAG.getPOISON(WidenSVT));
6129
6130 return DAG.getBuildVector(WidenVT, DL, Ops);
6131}
6132
6133SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
6134 // If this is an FCOPYSIGN with same input types, we can treat it as a
6135 // normal (can trap) binary op.
6136 if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
6137 return WidenVecRes_BinaryCanTrap(N);
6138
6139 // If the types are different, fall back to unrolling.
6140 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6141 return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
6142}
6143
6144/// Result and first source operand are different scalar types, but must have
6145/// the same number of elements. There is an additional control argument which
6146/// should be passed through unchanged.
6147SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) {
6148 SDValue FpValue = N->getOperand(0);
6149 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6150 if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)
6151 return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
6152 SDValue Arg = GetWidenedVector(FpValue);
6153 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)},
6154 N->getFlags());
6155}
6156
6157SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) {
6158 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6159 SDValue InOp = GetWidenedVector(N->getOperand(0));
6160 SDValue RHS = N->getOperand(1);
6161 EVT ExpVT = RHS.getValueType();
6162 SDValue ExpOp = RHS;
6163 if (ExpVT.isVector()) {
6164 EVT WideExpVT = WidenVT.changeVectorElementType(
6165 *DAG.getContext(), ExpVT.getVectorElementType());
6166 ExpOp = ModifyToType(RHS, WideExpVT);
6167 }
6168
6169 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp);
6170}
6171
6172SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
6173 // Unary op widening.
6174 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6175 SDValue InOp = GetWidenedVector(N->getOperand(0));
6176 if (N->getNumOperands() == 1)
6177 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, N->getFlags());
6178 if (N->getOpcode() == ISD::AssertNoFPClass)
6179 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp,
6180 N->getOperand(1), N->getFlags());
6181
6182 assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
6183 assert(N->isVPOpcode() && "Expected VP opcode");
6184
6185 SDValue Mask =
6186 GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
6187 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT,
6188 {InOp, Mask, N->getOperand(2)});
6189}
6190
6191SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
6192 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6193 EVT ExtVT = EVT::getVectorVT(
6194 *DAG.getContext(),
6195 cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(),
6196 WidenVT.getVectorElementCount());
6197 SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
6198 return DAG.getNode(N->getOpcode(), SDLoc(N),
6199 WidenVT, WidenLHS, DAG.getValueType(ExtVT));
6200}
6201
6202SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N,
6203 unsigned ResNo) {
6204 EVT VT0 = N->getValueType(0);
6205 EVT VT1 = N->getValueType(1);
6206
6207 assert(VT0.isVector() && VT1.isVector() &&
6209 "expected both results to be vectors of matching element count");
6210
6211 LLVMContext &Ctx = *DAG.getContext();
6212 SDValue InOp = GetWidenedVector(N->getOperand(0));
6213
6214 EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(ResNo));
6215 ElementCount WidenEC = WidenVT.getVectorElementCount();
6216
6217 EVT WidenVT0 = EVT::getVectorVT(Ctx, VT0.getVectorElementType(), WidenEC);
6218 EVT WidenVT1 = EVT::getVectorVT(Ctx, VT1.getVectorElementType(), WidenEC);
6219
6220 SDNode *WidenNode =
6221 DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT0, WidenVT1}, InOp)
6222 .getNode();
6223
6224 ReplaceOtherWidenResults(N, WidenNode, ResNo);
6225 return SDValue(WidenNode, ResNo);
6226}
6227
6228SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
6229 SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
6230 return GetWidenedVector(WidenVec);
6231}
6232
6233SDValue DAGTypeLegalizer::WidenVecRes_ADDRSPACECAST(SDNode *N) {
6234 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6235 SDValue InOp = GetWidenedVector(N->getOperand(0));
6236 auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N);
6237
6238 return DAG.getAddrSpaceCast(SDLoc(N), WidenVT, InOp,
6239 AddrSpaceCastN->getSrcAddressSpace(),
6240 AddrSpaceCastN->getDestAddressSpace());
6241}
6242
6243SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
6244 SDValue InOp = N->getOperand(0);
6245 EVT InVT = InOp.getValueType();
6246 EVT VT = N->getValueType(0);
6247 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6248 SDLoc dl(N);
6249
6250 switch (getTypeAction(InVT)) {
6252 break;
6254 report_fatal_error("Scalarization of scalable vectors is not supported.");
6256 // If the incoming type is a vector that is being promoted, then
6257 // we know that the elements are arranged differently and that we
6258 // must perform the conversion using a stack slot.
6259 if (InVT.isVector())
6260 break;
6261
6262 // If the InOp is promoted to the same size, convert it. Otherwise,
6263 // fall out of the switch and widen the promoted input.
6264 SDValue NInOp = GetPromotedInteger(InOp);
6265 EVT NInVT = NInOp.getValueType();
6266 if (WidenVT.bitsEq(NInVT)) {
6267 // For big endian targets we need to shift the input integer or the
6268 // interesting bits will end up at the wrong place.
6269 if (DAG.getDataLayout().isBigEndian()) {
6270 unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
6271 NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp,
6272 DAG.getShiftAmountConstant(ShiftAmt, NInVT, dl));
6273 }
6274 return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp);
6275 }
6276 InOp = NInOp;
6277 InVT = NInVT;
6278 break;
6279 }
6286 break;
6288 // If the InOp is widened to the same size, convert it. Otherwise, fall
6289 // out of the switch and widen the widened input.
6290 InOp = GetWidenedVector(InOp);
6291 InVT = InOp.getValueType();
6292 if (WidenVT.bitsEq(InVT))
6293 // The input widens to the same size. Convert to the widen value.
6294 return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
6295 break;
6296 }
6297
6298 unsigned WidenSize = WidenVT.getSizeInBits();
6299 unsigned InSize = InVT.getSizeInBits();
6300 unsigned InScalarSize = InVT.getScalarSizeInBits();
6301 // x86mmx is not an acceptable vector element type, so don't try.
6302 if (WidenSize % InScalarSize == 0 && InVT != MVT::x86mmx) {
6303 // Determine new input vector type. The new input vector type will use
6304 // the same element type (if its a vector) or use the input type as a
6305 // vector. It is the same size as the type to widen to.
6306 EVT NewInVT;
6307 unsigned NewNumParts = WidenSize / InSize;
6308 if (InVT.isVector()) {
6309 EVT InEltVT = InVT.getVectorElementType();
6310 NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
6311 WidenSize / InEltVT.getSizeInBits());
6312 } else {
6313 // For big endian systems, using the promoted input scalar type
6314 // to produce the scalar_to_vector would put the desired bits into
6315 // the least significant byte(s) of the wider element zero. This
6316 // will mean that the users of the result vector are using incorrect
6317 // bits. Use the original input type instead. Although either input
6318 // type can be used on little endian systems, for consistency we
6319 // use the original type there as well.
6320 EVT OrigInVT = N->getOperand(0).getValueType();
6321 NewNumParts = WidenSize / OrigInVT.getSizeInBits();
6322 NewInVT = EVT::getVectorVT(*DAG.getContext(), OrigInVT, NewNumParts);
6323 }
6324
6325 if (TLI.isTypeLegal(NewInVT)) {
6326 SDValue NewVec;
6327 if (InVT.isVector()) {
6328 // Because the result and the input are different vector types, widening
6329 // the result could create a legal type but widening the input might
6330 // make it an illegal type that might lead to repeatedly splitting the
6331 // input and then widening it. To avoid this, we widen the input only if
6332 // it results in a legal type.
6333 if (WidenSize % InSize == 0) {
6334 SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT));
6335 Ops[0] = InOp;
6336
6337 NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
6338 } else {
6340 DAG.ExtractVectorElements(InOp, Ops);
6341 Ops.append(WidenSize / InScalarSize - Ops.size(),
6342 DAG.getPOISON(InVT.getVectorElementType()));
6343
6344 NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
6345 }
6346 } else {
6347 NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp);
6348 }
6349 return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
6350 }
6351 }
6352
6353 return CreateStackStoreLoad(InOp, WidenVT);
6354}
6355
6356SDValue DAGTypeLegalizer::WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
6357 return DAG.getNode(
6358 N->getOpcode(), SDLoc(N),
6359 TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
6360 N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3));
6361}
6362
6363SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
6364 SDLoc dl(N);
6365 // Build a vector with poison for the new nodes.
6366 EVT VT = N->getValueType(0);
6367
6368 // Integer BUILD_VECTOR operands may be larger than the node's vector element
6369 // type. The POISONs need to have the same type as the existing operands.
6370 EVT EltVT = N->getOperand(0).getValueType();
6371 unsigned NumElts = VT.getVectorNumElements();
6372
6373 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6374 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6375
6376 SmallVector<SDValue, 16> NewOps(N->ops());
6377 assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
6378 NewOps.append(WidenNumElts - NumElts, DAG.getPOISON(EltVT));
6379
6380 return DAG.getBuildVector(WidenVT, dl, NewOps);
6381}
6382
6383SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
6384 EVT InVT = N->getOperand(0).getValueType();
6385 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6386 SDLoc dl(N);
6387 unsigned NumOperands = N->getNumOperands();
6388
6389 bool InputWidened = false; // Indicates we need to widen the input.
6390 if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
6391 unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
6392 unsigned NumInElts = InVT.getVectorMinNumElements();
6393 if (WidenNumElts % NumInElts == 0) {
6394 // Add undef vectors to widen to correct length.
6395 unsigned NumConcat = WidenNumElts / NumInElts;
6396 SDValue UndefVal = DAG.getPOISON(InVT);
6397 SmallVector<SDValue, 16> Ops(NumConcat);
6398 for (unsigned i=0; i < NumOperands; ++i)
6399 Ops[i] = N->getOperand(i);
6400 for (unsigned i = NumOperands; i != NumConcat; ++i)
6401 Ops[i] = UndefVal;
6402 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
6403 }
6404 } else {
6405 InputWidened = true;
6406 if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
6407 // The inputs and the result are widen to the same value.
6408 unsigned i;
6409 for (i=1; i < NumOperands; ++i)
6410 if (!N->getOperand(i).isUndef())
6411 break;
6412
6413 if (i == NumOperands)
6414 // Everything but the first operand is an UNDEF so just return the
6415 // widened first operand.
6416 return GetWidenedVector(N->getOperand(0));
6417
6418 if (NumOperands == 2) {
6419 assert(!WidenVT.isScalableVector() &&
6420 "Cannot use vector shuffles to widen CONCAT_VECTOR result");
6421 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6422 unsigned NumInElts = InVT.getVectorNumElements();
6423
6424 // Replace concat of two operands with a shuffle.
6425 SmallVector<int, 16> MaskOps(WidenNumElts, -1);
6426 for (unsigned i = 0; i < NumInElts; ++i) {
6427 MaskOps[i] = i;
6428 MaskOps[i + NumInElts] = i + WidenNumElts;
6429 }
6430 return DAG.getVectorShuffle(WidenVT, dl,
6431 GetWidenedVector(N->getOperand(0)),
6432 GetWidenedVector(N->getOperand(1)),
6433 MaskOps);
6434 }
6435 }
6436 }
6437
6438 assert(!WidenVT.isScalableVector() &&
6439 "Cannot use build vectors to widen CONCAT_VECTOR result");
6440 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6441 unsigned NumInElts = InVT.getVectorNumElements();
6442
6443 // Fall back to use extracts and build vector.
6444 EVT EltVT = WidenVT.getVectorElementType();
6445 SmallVector<SDValue, 16> Ops(WidenNumElts);
6446 unsigned Idx = 0;
6447 for (unsigned i=0; i < NumOperands; ++i) {
6448 SDValue InOp = N->getOperand(i);
6449 if (InputWidened)
6450 InOp = GetWidenedVector(InOp);
6451 for (unsigned j = 0; j < NumInElts; ++j)
6452 Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
6453 }
6454 SDValue UndefVal = DAG.getPOISON(EltVT);
6455 for (; Idx < WidenNumElts; ++Idx)
6456 Ops[Idx] = UndefVal;
6457 return DAG.getBuildVector(WidenVT, dl, Ops);
6458}
6459
6460SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {
6461 EVT VT = N->getValueType(0);
6462 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6463 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
6464 SDValue InOp2 = N->getOperand(1);
6465 SDValue Idx = N->getOperand(2);
6466 SDLoc dl(N);
6467 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx);
6468}
6469
6470SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
6471 EVT VT = N->getValueType(0);
6472 EVT EltVT = VT.getVectorElementType();
6473 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6474 SDValue InOp = N->getOperand(0);
6475 SDValue Idx = N->getOperand(1);
6476 SDLoc dl(N);
6477
6478 auto InOpTypeAction = getTypeAction(InOp.getValueType());
6479 if (InOpTypeAction == TargetLowering::TypeWidenVector)
6480 InOp = GetWidenedVector(InOp);
6481
6482 EVT InVT = InOp.getValueType();
6483
6484 // Check if we can just return the input vector after widening.
6485 uint64_t IdxVal = Idx->getAsZExtVal();
6486 if (IdxVal == 0 && InVT == WidenVT)
6487 return InOp;
6488
6489 // Check if we can extract from the vector.
6490 unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
6491 unsigned InNumElts = InVT.getVectorMinNumElements();
6492 unsigned VTNumElts = VT.getVectorMinNumElements();
6493 assert(IdxVal % VTNumElts == 0 &&
6494 "Expected Idx to be a multiple of subvector minimum vector length");
6495 if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
6496 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
6497
6498 if (VT.isScalableVector()) {
6499 // Try to split the operation up into smaller extracts and concat the
6500 // results together, e.g.
6501 // nxv6i64 extract_subvector(nxv12i64, 6)
6502 // <->
6503 // nxv8i64 concat(
6504 // nxv2i64 extract_subvector(nxv16i64, 6)
6505 // nxv2i64 extract_subvector(nxv16i64, 8)
6506 // nxv2i64 extract_subvector(nxv16i64, 10)
6507 // undef)
6508 unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
6509 assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
6510 "down type's element count");
6511 EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
6513 // Avoid recursion around e.g. nxv1i8.
6514 if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
6516 unsigned I = 0;
6517 for (; I < VTNumElts / GCD; ++I)
6518 Parts.push_back(
6519 DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD));
6520 for (; I < WidenNumElts / GCD; ++I)
6521 Parts.push_back(DAG.getPOISON(PartVT));
6522
6523 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
6524 }
6525
6526 // Fallback to extracting through memory.
6527
6528 Align Alignment = DAG.getReducedAlign(InVT, /*UseABI=*/false);
6529 SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Alignment);
6530 MachineFunction &MF = DAG.getMachineFunction();
6531 int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6532 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
6533
6534 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
6537 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
6540
6541 // Write out the input vector.
6542 SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, StoreMMO);
6543
6544 // Build a mask to match the length of the non-widened result.
6545 SDValue Mask =
6546 DAG.getMaskFromElementCount(dl, WidenVT, VT.getVectorElementCount());
6547
6548 // Read back the sub-vector setting the remaining lanes to poison.
6549 StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, InVT, VT, Idx);
6550 return DAG.getMaskedLoad(
6551 WidenVT, dl, Ch, StackPtr, DAG.getPOISON(StackPtr.getValueType()), Mask,
6552 DAG.getPOISON(WidenVT), VT, LoadMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
6553 }
6554
6555 // We could try widening the input to the right length but for now, extract
6556 // the original elements, fill the rest with undefs and build a vector.
6557 SmallVector<SDValue, 16> Ops(WidenNumElts);
6558 unsigned i;
6559 for (i = 0; i < VTNumElts; ++i)
6560 Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i);
6561
6562 SDValue UndefVal = DAG.getPOISON(EltVT);
6563 for (; i < WidenNumElts; ++i)
6564 Ops[i] = UndefVal;
6565 return DAG.getBuildVector(WidenVT, dl, Ops);
6566}
6567
6568SDValue DAGTypeLegalizer::WidenVecRes_AssertZext(SDNode *N) {
6569 SDValue InOp = ModifyToType(
6570 N->getOperand(0),
6571 TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), true);
6572 return DAG.getNode(ISD::AssertZext, SDLoc(N), InOp.getValueType(), InOp,
6573 N->getOperand(1));
6574}
6575
6576SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
6577 SDValue InOp = GetWidenedVector(N->getOperand(0));
6578 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
6579 InOp.getValueType(), InOp,
6580 N->getOperand(1), N->getOperand(2));
6581}
6582
6583/// Either return the same load or provide appropriate casts
6584/// from the load and return that.
6585static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
6586 TypeSize LdWidth, TypeSize FirstVTWidth,
6587 SDLoc dl, SelectionDAG &DAG) {
6588 assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth) &&
6589 "Load width must be less than or equal to first value type width");
6590 TypeSize WidenWidth = WidenVT.getSizeInBits();
6591 if (!FirstVT.isVector()) {
6592 unsigned NumElts =
6593 WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
6594 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
6595 SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
6596 return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
6597 }
6598 assert(FirstVT == WidenVT && "First value type must equal widen value type");
6599 return LdOp;
6600}
6601
6602static std::optional<EVT> findMemType(SelectionDAG &DAG,
6603 const TargetLowering &TLI, unsigned Width,
6604 EVT WidenVT, unsigned Align,
6605 unsigned WidenEx);
6606
6607SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
6608 EVT WidenVT =
6609 TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
6610 EVT LdVT = LD->getMemoryVT();
6611 SDLoc dl(LD);
6612 assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
6613 assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
6614 "Must be scalable");
6615 assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
6616 "Expected equivalent element types");
6617
6618 // Load information
6619 SDValue Chain = LD->getChain();
6620 SDValue BasePtr = LD->getBasePtr();
6621
6622 TypeSize LdWidth = LdVT.getSizeInBits();
6623 TypeSize WidenWidth = WidenVT.getSizeInBits();
6624 TypeSize WidthDiff = WidenWidth - LdWidth;
6625
6626 // Find the vector type that can load from.
6627 std::optional<EVT> FirstVT =
6628 findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
6629 WidthDiff.getKnownMinValue());
6630
6631 if (!FirstVT)
6632 return SDValue();
6633
6634 SmallVector<EVT, 8> MemVTs;
6635 TypeSize FirstVTWidth = FirstVT->getSizeInBits();
6636
6637 SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
6638 Chain, BasePtr, LD->getMemOperand());
6639
6640 // Load the element with one instruction.
6641 SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
6642 FirstVTWidth, dl, DAG);
6643
6644 // Modified the chain - switch anything that used the old chain to use
6645 // the new one.
6646 ReplaceValueWith(SDValue(LD, 1), LdOp.getValue(1));
6647 return Result;
6648}
6649
6650SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
6651 LoadSDNode *LD = cast<LoadSDNode>(N);
6652 ISD::LoadExtType ExtType = LD->getExtensionType();
6653
6654 // A vector must always be stored in memory as-is, i.e. without any padding
6655 // between the elements, since various code depend on it, e.g. in the
6656 // handling of a bitcast of a vector type to int, which may be done with a
6657 // vector store followed by an integer load. A vector that does not have
6658 // elements that are byte-sized must therefore be stored as an integer
6659 // built out of the extracted vector elements.
6660 if (!LD->getMemoryVT().isByteSized()) {
6661 SDValue Value, NewChain;
6662 std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
6663 ReplaceValueWith(SDValue(LD, 0), Value);
6664 ReplaceValueWith(SDValue(LD, 1), NewChain);
6665 return SDValue();
6666 }
6667
6668 // Generate a vector-predicated load if it is custom/legal on the target. To
6669 // avoid possible recursion, only do this if the widened mask type is legal.
6670 // FIXME: Not all targets may support EVL in VP_LOAD. These will have been
6671 // removed from the IR by the ExpandVectorPredication pass but we're
6672 // reintroducing them here.
6673 EVT VT = LD->getValueType(0);
6674 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6675 EVT WideMaskVT = getSetCCResultType(WideVT);
6676
6677 if (ExtType == ISD::NON_EXTLOAD &&
6678 TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
6679 TLI.isTypeLegal(WideMaskVT)) {
6680 SDLoc DL(N);
6681 SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
6682 SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
6684 SDValue NewLoad =
6685 DAG.getLoadVP(LD->getAddressingMode(), ISD::NON_EXTLOAD, WideVT, DL,
6686 LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
6687 EVL, LD->getMemoryVT(), LD->getMemOperand());
6688
6689 // Modified the chain - switch anything that used the old chain to use
6690 // the new one.
6691 ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
6692
6693 return NewLoad;
6694 }
6695
6697 SmallVector<SDValue, 16> LdChain; // Chain for the series of load
6698 if (ExtType != ISD::NON_EXTLOAD)
6699 Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
6700 else
6701 Result = GenWidenVectorLoads(LdChain, LD);
6702
6703 if (Result) {
6704 // If we generate a single load, we can use that for the chain. Otherwise,
6705 // build a factor node to remember the multiple loads are independent and
6706 // chain to that.
6707 SDValue NewChain;
6708 if (LdChain.size() == 1)
6709 NewChain = LdChain[0];
6710 else
6711 NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
6712
6713 // Modified the chain - switch anything that used the old chain to use
6714 // the new one.
6715 ReplaceValueWith(SDValue(N, 1), NewChain);
6716
6717 return Result;
6718 }
6719
6720 if (VT.isVector()) {
6721 // If all else fails replace the load with a wide masked load.
6722 SDLoc DL(N);
6723 SDValue Mask =
6724 DAG.getMaskFromElementCount(DL, WideVT, VT.getVectorElementCount());
6725
6726 SDValue NewLoad = DAG.getMaskedLoad(
6727 WideVT, DL, LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
6728 DAG.getPOISON(WideVT), LD->getMemoryVT(), LD->getMemOperand(),
6729 LD->getAddressingMode(), LD->getExtensionType());
6730
6731 ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
6732 return NewLoad;
6733 }
6734
6735 report_fatal_error("Unable to widen vector load");
6736}
6737
6738SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
6739 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6740 SDValue Mask = N->getMask();
6741 SDValue EVL = N->getVectorLength();
6742 ISD::LoadExtType ExtType = N->getExtensionType();
6743 SDLoc dl(N);
6744
6745 // The mask should be widened as well
6746 assert(getTypeAction(Mask.getValueType()) ==
6748 "Unable to widen binary VP op");
6749 Mask = GetWidenedVector(Mask);
6750 assert(Mask.getValueType().getVectorElementCount() ==
6751 TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType())
6752 .getVectorElementCount() &&
6753 "Unable to widen vector load");
6754
6755 SDValue Res =
6756 DAG.getLoadVP(N->getAddressingMode(), ExtType, WidenVT, dl, N->getChain(),
6757 N->getBasePtr(), N->getOffset(), Mask, EVL,
6758 N->getMemoryVT(), N->getMemOperand(), N->isExpandingLoad());
6759 // Legalize the chain result - switch anything that used the old chain to
6760 // use the new one.
6761 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6762 return Res;
6763}
6764
6765SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N) {
6766 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6767 SDValue Mask = N->getMask();
6768 SDValue EVL = N->getVectorLength();
6769 SDLoc dl(N);
6770
6771 // The mask should be widened as well
6772 assert(getTypeAction(Mask.getValueType()) ==
6774 "Unable to widen binary VP op");
6775 Mask = GetWidenedVector(Mask);
6776 assert(Mask.getValueType().getVectorElementCount() ==
6777 TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType())
6778 .getVectorElementCount() &&
6779 "Unable to widen vector load");
6780
6781 SDValue Res = DAG.getLoadFFVP(WidenVT, dl, N->getChain(), N->getBasePtr(),
6782 Mask, EVL, N->getMemOperand());
6783 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6784 ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
6785 return Res;
6786}
6787
6788SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
6789 SDLoc DL(N);
6790
6791 // The mask should be widened as well
6792 SDValue Mask = N->getMask();
6793 assert(getTypeAction(Mask.getValueType()) ==
6795 "Unable to widen VP strided load");
6796 Mask = GetWidenedVector(Mask);
6797
6798 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6799 assert(Mask.getValueType().getVectorElementCount() ==
6800 WidenVT.getVectorElementCount() &&
6801 "Data and mask vectors should have the same number of elements");
6802
6803 SDValue Res = DAG.getStridedLoadVP(
6804 N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(),
6805 N->getBasePtr(), N->getOffset(), N->getStride(), Mask,
6806 N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(),
6807 N->isExpandingLoad());
6808
6809 // Legalize the chain result - switch anything that used the old chain to
6810 // use the new one.
6811 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6812 return Res;
6813}
6814
6815SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_COMPRESS(SDNode *N) {
6816 SDValue Vec = N->getOperand(0);
6817 SDValue Mask = N->getOperand(1);
6818 SDValue Passthru = N->getOperand(2);
6819 EVT WideVecVT =
6820 TLI.getTypeToTransformTo(*DAG.getContext(), Vec.getValueType());
6821 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
6822 Mask.getValueType().getVectorElementType(),
6823 WideVecVT.getVectorElementCount());
6824
6825 SDValue WideVec = ModifyToType(Vec, WideVecVT);
6826 SDValue WideMask = ModifyToType(Mask, WideMaskVT, /*FillWithZeroes=*/true);
6827 SDValue WidePassthru = ModifyToType(Passthru, WideVecVT);
6828 return DAG.getNode(ISD::VECTOR_COMPRESS, SDLoc(N), WideVecVT, WideVec,
6829 WideMask, WidePassthru);
6830}
6831
6832SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
6833 EVT VT = N->getValueType(0);
6834 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6835 SDValue Mask = N->getMask();
6836 EVT MaskVT = Mask.getValueType();
6837 SDValue PassThru = GetWidenedVector(N->getPassThru());
6838 ISD::LoadExtType ExtType = N->getExtensionType();
6839 SDLoc dl(N);
6840
6841 EVT WideMaskVT =
6842 EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(),
6843 WidenVT.getVectorElementCount());
6844
6845 if (ExtType == ISD::NON_EXTLOAD && !N->isExpandingLoad() &&
6846 TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WidenVT) &&
6847 TLI.isTypeLegal(WideMaskVT) &&
6848 // If there is a passthru, we shouldn't use vp.load. However,
6849 // type legalizer will struggle on masked.load with
6850 // scalable vectors, so for scalable vectors, we still use vp.load
6851 // but manually merge the load result with the passthru using vp.select.
6852 (N->getPassThru()->isUndef() || VT.isScalableVector())) {
6853 Mask = DAG.getInsertSubvector(dl, DAG.getPOISON(WideMaskVT), Mask, 0);
6854 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
6856 SDValue NewLoad =
6857 DAG.getLoadVP(N->getAddressingMode(), ISD::NON_EXTLOAD, WidenVT, dl,
6858 N->getChain(), N->getBasePtr(), N->getOffset(), Mask, EVL,
6859 N->getMemoryVT(), N->getMemOperand());
6860 SDValue NewVal = NewLoad;
6861
6862 // Manually merge with vselect
6863 if (!N->getPassThru()->isUndef()) {
6864 assert(WidenVT.isScalableVector());
6865 NewVal = DAG.getNode(ISD::VSELECT, dl, WidenVT, Mask, NewVal, PassThru);
6866 // The lanes past EVL are poison.
6867 NewVal = DAG.getNode(ISD::VP_MERGE, dl, WidenVT,
6868 DAG.getAllOnesConstant(dl, WideMaskVT), NewVal,
6869 DAG.getPOISON(WidenVT), EVL);
6870 }
6871
6872 // Modified the chain - switch anything that used the old chain to use
6873 // the new one.
6874 ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
6875
6876 return NewVal;
6877 }
6878
6879 // The mask should be widened as well
6880 Mask = ModifyToType(Mask, WideMaskVT, true);
6881
6882 SDValue Res = DAG.getMaskedLoad(
6883 WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
6884 PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
6885 ExtType, N->isExpandingLoad());
6886 // Legalize the chain result - switch anything that used the old chain to
6887 // use the new one.
6888 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6889 return Res;
6890}
6891
6892SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
6893
6894 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6895 SDValue Mask = N->getMask();
6896 EVT MaskVT = Mask.getValueType();
6897 SDValue PassThru = GetWidenedVector(N->getPassThru());
6898 SDValue Scale = N->getScale();
6899 unsigned NumElts = WideVT.getVectorNumElements();
6900 SDLoc dl(N);
6901
6902 // The mask should be widened as well
6903 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
6904 MaskVT.getVectorElementType(),
6905 WideVT.getVectorNumElements());
6906 Mask = ModifyToType(Mask, WideMaskVT, true);
6907
6908 // Widen the Index operand
6909 SDValue Index = N->getIndex();
6910 EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
6911 Index.getValueType().getScalarType(),
6912 NumElts);
6913 Index = ModifyToType(Index, WideIndexVT);
6914 SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
6915 Scale };
6916
6917 // Widen the MemoryType
6918 EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
6919 N->getMemoryVT().getScalarType(), NumElts);
6920 SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
6921 WideMemVT, dl, Ops, N->getMemOperand(),
6922 N->getIndexType(), N->getExtensionType());
6923
6924 // Legalize the chain result - switch anything that used the old chain to
6925 // use the new one.
6926 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6927 return Res;
6928}
6929
6930SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) {
6931 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6932 SDValue Mask = N->getMask();
6933 SDValue Scale = N->getScale();
6934 ElementCount WideEC = WideVT.getVectorElementCount();
6935 SDLoc dl(N);
6936
6937 SDValue Index = GetWidenedVector(N->getIndex());
6938 EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
6939 N->getMemoryVT().getScalarType(), WideEC);
6940 Mask = GetWidenedMask(Mask, WideEC);
6941
6942 SDValue Ops[] = {N->getChain(), N->getBasePtr(), Index, Scale,
6943 Mask, N->getVectorLength()};
6944 SDValue Res = DAG.getGatherVP(DAG.getVTList(WideVT, MVT::Other), WideMemVT,
6945 dl, Ops, N->getMemOperand(), N->getIndexType());
6946
6947 // Legalize the chain result - switch anything that used the old chain to
6948 // use the new one.
6949 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6950 return Res;
6951}
6952
6953SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) {
6954 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6955 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0));
6956}
6957
6958// Return true is this is a SETCC node or a strict version of it.
6959static inline bool isSETCCOp(unsigned Opcode) {
6960 switch (Opcode) {
6961 case ISD::SETCC:
6962 case ISD::STRICT_FSETCC:
6964 return true;
6965 }
6966 return false;
6967}
6968
6969// Return true if this is a node that could have two SETCCs as operands.
6970static inline bool isLogicalMaskOp(unsigned Opcode) {
6971 switch (Opcode) {
6972 case ISD::AND:
6973 case ISD::OR:
6974 case ISD::XOR:
6975 return true;
6976 }
6977 return false;
6978}
6979
6980// If N is a SETCC or a strict variant of it, return the type
6981// of the compare operands.
6983 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
6984 return N->getOperand(OpNo).getValueType();
6985}
6986
6987// This is used just for the assert in convertMask(). Check that this either
6988// a SETCC or a previously handled SETCC by convertMask().
6989#ifndef NDEBUG
6990static inline bool isSETCCorConvertedSETCC(SDValue N) {
6991 if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
6992 N = N.getOperand(0);
6993 else if (N.getOpcode() == ISD::CONCAT_VECTORS) {
6994 for (unsigned i = 1; i < N->getNumOperands(); ++i)
6995 if (!N->getOperand(i)->isUndef())
6996 return false;
6997 N = N.getOperand(0);
6998 }
6999
7000 if (N.getOpcode() == ISD::TRUNCATE)
7001 N = N.getOperand(0);
7002 else if (N.getOpcode() == ISD::SIGN_EXTEND)
7003 N = N.getOperand(0);
7004
7005 if (isLogicalMaskOp(N.getOpcode()))
7006 return isSETCCorConvertedSETCC(N.getOperand(0)) &&
7007 isSETCCorConvertedSETCC(N.getOperand(1));
7008
7009 return (isSETCCOp(N.getOpcode()) ||
7011}
7012#endif
7013
7014// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT
7015// to ToMaskVT if needed with vector extension or truncation.
7016SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
7017 EVT ToMaskVT) {
7018 // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
7019 // FIXME: This code seems to be too restrictive, we might consider
7020 // generalizing it or dropping it.
7021 assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
7022
7023 // Make a new Mask node, with a legal result VT.
7024 SDValue Mask;
7026 for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
7027 Ops.push_back(InMask->getOperand(i));
7028 if (InMask->isStrictFPOpcode()) {
7029 Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask),
7030 { MaskVT, MVT::Other }, Ops);
7031 ReplaceValueWith(InMask.getValue(1), Mask.getValue(1));
7032 }
7033 else
7034 Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops,
7035 InMask->getFlags());
7036
7037 // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
7038 // extend or truncate is needed.
7039 LLVMContext &Ctx = *DAG.getContext();
7040 unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
7041 unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
7042 if (MaskScalarBits < ToMaskScalBits) {
7043 EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
7044 MaskVT.getVectorNumElements());
7045 Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask);
7046 } else if (MaskScalarBits > ToMaskScalBits) {
7047 EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
7048 MaskVT.getVectorNumElements());
7049 Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask);
7050 }
7051
7052 assert(Mask->getValueType(0).getScalarSizeInBits() ==
7053 ToMaskVT.getScalarSizeInBits() &&
7054 "Mask should have the right element size by now.");
7055
7056 // Adjust Mask to the right number of elements.
7057 unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
7058 if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
7059 Mask = DAG.getExtractSubvector(SDLoc(Mask), ToMaskVT, Mask, 0);
7060 } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
7061 unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
7062 EVT SubVT = Mask->getValueType(0);
7063 SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getPOISON(SubVT));
7064 SubOps[0] = Mask;
7065 Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps);
7066 }
7067
7068 assert((Mask->getValueType(0) == ToMaskVT) &&
7069 "A mask of ToMaskVT should have been produced by now.");
7070
7071 return Mask;
7072}
7073
7074// This method tries to handle some special cases for the vselect mask
7075// and if needed adjusting the mask vector type to match that of the VSELECT.
7076// Without it, many cases end up with scalarization of the SETCC, with many
7077// unnecessary instructions.
7078SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
7079 LLVMContext &Ctx = *DAG.getContext();
7080 SDValue Cond = N->getOperand(0);
7081
7082 if (N->getOpcode() != ISD::VSELECT)
7083 return SDValue();
7084
7085 if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode()))
7086 return SDValue();
7087
7088 // If this is a splitted VSELECT that was previously already handled, do
7089 // nothing.
7090 EVT CondVT = Cond->getValueType(0);
7091 if (CondVT.getScalarSizeInBits() != 1)
7092 return SDValue();
7093
7094 EVT VSelVT = N->getValueType(0);
7095
7096 // This method can't handle scalable vector types.
7097 // FIXME: This support could be added in the future.
7098 if (VSelVT.isScalableVector())
7099 return SDValue();
7100
7101 // Only handle vector types which are a power of 2.
7102 if (!isPowerOf2_64(VSelVT.getSizeInBits()))
7103 return SDValue();
7104
7105 // Don't touch if this will be scalarized.
7106 EVT FinalVT = VSelVT;
7107 while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
7108 FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx);
7109
7110 if (FinalVT.getVectorNumElements() == 1)
7111 return SDValue();
7112
7113 // If there is support for an i1 vector mask, don't touch.
7114 if (isSETCCOp(Cond.getOpcode())) {
7115 EVT SetCCOpVT = getSETCCOperandType(Cond);
7116 while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
7117 SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
7118 EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
7119 if (SetCCResVT.getScalarSizeInBits() == 1)
7120 return SDValue();
7121 } else if (CondVT.getScalarType() == MVT::i1) {
7122 // If there is support for an i1 vector mask (or only scalar i1 conditions),
7123 // don't touch.
7124 while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal)
7125 CondVT = TLI.getTypeToTransformTo(Ctx, CondVT);
7126
7127 if (CondVT.getScalarType() == MVT::i1)
7128 return SDValue();
7129 }
7130
7131 // Widen the vselect result type if needed.
7132 if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector)
7133 VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
7134
7135 // The mask of the VSELECT should have integer elements.
7136 EVT ToMaskVT = VSelVT;
7137 if (!ToMaskVT.getScalarType().isInteger())
7138 ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
7139
7140 SDValue Mask;
7141 if (isSETCCOp(Cond->getOpcode())) {
7142 EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond));
7143 Mask = convertMask(Cond, MaskVT, ToMaskVT);
7144 } else if (isLogicalMaskOp(Cond->getOpcode()) &&
7145 isSETCCOp(Cond->getOperand(0).getOpcode()) &&
7146 isSETCCOp(Cond->getOperand(1).getOpcode())) {
7147 // Cond is (AND/OR/XOR (SETCC, SETCC))
7148 SDValue SETCC0 = Cond->getOperand(0);
7149 SDValue SETCC1 = Cond->getOperand(1);
7150 EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0));
7151 EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1));
7152 unsigned ScalarBits0 = VT0.getScalarSizeInBits();
7153 unsigned ScalarBits1 = VT1.getScalarSizeInBits();
7154 unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
7155 EVT MaskVT;
7156 // If the two SETCCs have different VTs, either extend/truncate one of
7157 // them to the other "towards" ToMaskVT, or truncate one and extend the
7158 // other to ToMaskVT.
7159 if (ScalarBits0 != ScalarBits1) {
7160 EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1);
7161 EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0);
7162 if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits())
7163 MaskVT = WideVT;
7164 else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits())
7165 MaskVT = NarrowVT;
7166 else
7167 MaskVT = ToMaskVT;
7168 } else
7169 // If the two SETCCs have the same VT, don't change it.
7170 MaskVT = VT0;
7171
7172 // Make new SETCCs and logical nodes.
7173 SETCC0 = convertMask(SETCC0, VT0, MaskVT);
7174 SETCC1 = convertMask(SETCC1, VT1, MaskVT);
7175 Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1);
7176
7177 // Convert the logical op for VSELECT if needed.
7178 Mask = convertMask(Cond, MaskVT, ToMaskVT);
7179 } else
7180 return SDValue();
7181
7182 return Mask;
7183}
7184
7185SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) {
7186 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
7187 ElementCount WidenEC = WidenVT.getVectorElementCount();
7188
7189 SDValue Cond1 = N->getOperand(0);
7190 EVT CondVT = Cond1.getValueType();
7191 unsigned Opcode = N->getOpcode();
7192 if (CondVT.isVector()) {
7193 if (SDValue WideCond = WidenVSELECTMask(N)) {
7194 SDValue InOp1 = GetWidenedVector(N->getOperand(1));
7195 SDValue InOp2 = GetWidenedVector(N->getOperand(2));
7196 assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
7197 return DAG.getNode(Opcode, SDLoc(N), WidenVT, WideCond, InOp1, InOp2);
7198 }
7199
7200 EVT CondEltVT = CondVT.getVectorElementType();
7201 EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC);
7202 if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
7203 Cond1 = GetWidenedVector(Cond1);
7204
7205 // If we have to split the condition there is no point in widening the
7206 // select. This would result in an cycle of widening the select ->
7207 // widening the condition operand -> splitting the condition operand ->
7208 // splitting the select -> widening the select. Instead split this select
7209 // further and widen the resulting type.
7210 if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
7211 SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
7212 SDValue Res = ModifyToType(SplitSelect, WidenVT);
7213 return Res;
7214 }
7215
7216 if (Cond1.getValueType() != CondWidenVT)
7217 Cond1 = ModifyToType(Cond1, CondWidenVT);
7218 }
7219
7220 SDValue InOp1 = GetWidenedVector(N->getOperand(1));
7221 SDValue InOp2 = GetWidenedVector(N->getOperand(2));
7222 assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
7223 if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE)
7224 return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
7225 N->getOperand(3));
7226 return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
7227}
7228
7229SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
7230 SDValue InOp1 = GetWidenedVector(N->getOperand(2));
7231 SDValue InOp2 = GetWidenedVector(N->getOperand(3));
7232 return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
7233 InOp1.getValueType(), N->getOperand(0),
7234 N->getOperand(1), InOp1, InOp2, N->getOperand(4));
7235}
7236
7237SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
7238 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
7239 return DAG.getUNDEF(WidenVT);
7240}
7241
7242SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
7243 EVT VT = N->getValueType(0);
7244 SDLoc dl(N);
7245
7246 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7247 unsigned NumElts = VT.getVectorNumElements();
7248 unsigned WidenNumElts = WidenVT.getVectorNumElements();
7249
7250 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
7251 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
7252
7253 // Adjust mask based on new input vector length.
7254 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7255 for (unsigned i = 0; i != NumElts; ++i) {
7256 int Idx = N->getMaskElt(i);
7257 if (Idx < (int)NumElts)
7258 NewMask[i] = Idx;
7259 else
7260 NewMask[i] = Idx - NumElts + WidenNumElts;
7261 }
7262 return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
7263}
7264
7265SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
7266 EVT VT = N->getValueType(0);
7267 EVT EltVT = VT.getVectorElementType();
7268 SDLoc dl(N);
7269
7270 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7271 SDValue OpValue = GetWidenedVector(N->getOperand(0));
7272 assert(WidenVT == OpValue.getValueType() && "Unexpected widened vector type");
7273
7274 SDValue ReverseVal = DAG.getNode(ISD::VECTOR_REVERSE, dl, WidenVT, OpValue);
7275 unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
7276 unsigned VTNumElts = VT.getVectorMinNumElements();
7277 unsigned IdxVal = WidenNumElts - VTNumElts;
7278
7279 if (VT.isScalableVector()) {
7280 // Try to split the 'Widen ReverseVal' into smaller extracts and concat the
7281 // results together, e.g.(nxv6i64 -> nxv8i64)
7282 // nxv8i64 vector_reverse
7283 // <->
7284 // nxv8i64 concat(
7285 // nxv2i64 extract_subvector(nxv8i64, 2)
7286 // nxv2i64 extract_subvector(nxv8i64, 4)
7287 // nxv2i64 extract_subvector(nxv8i64, 6)
7288 // nxv2i64 undef)
7289
7290 unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
7291 EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
7293 assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
7294 "down type's element count");
7296 unsigned i = 0;
7297 for (; i < VTNumElts / GCD; ++i)
7298 Parts.push_back(
7299 DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD));
7300 for (; i < WidenNumElts / GCD; ++i)
7301 Parts.push_back(DAG.getPOISON(PartVT));
7302
7303 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
7304 }
7305
7306 // Use VECTOR_SHUFFLE to combine new vector from 'ReverseVal' for
7307 // fixed-vectors.
7308 SmallVector<int, 16> Mask(WidenNumElts, -1);
7309 std::iota(Mask.begin(), Mask.begin() + VTNumElts, IdxVal);
7310
7311 return DAG.getVectorShuffle(WidenVT, dl, ReverseVal, DAG.getPOISON(WidenVT),
7312 Mask);
7313}
7314
7315SDValue DAGTypeLegalizer::WidenVecRes_GET_ACTIVE_LANE_MASK(SDNode *N) {
7316 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
7317 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, SDLoc(N), NVT, N->ops());
7318}
7319
7320SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
7321 assert(N->getValueType(0).isVector() &&
7322 N->getOperand(0).getValueType().isVector() &&
7323 "Operands must be vectors");
7324 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
7325 ElementCount WidenEC = WidenVT.getVectorElementCount();
7326
7327 SDValue InOp1 = N->getOperand(0);
7328 EVT InVT = InOp1.getValueType();
7329 assert(InVT.isVector() && "can not widen non-vector type");
7330 EVT WidenInVT =
7331 EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenEC);
7332
7333 // The input and output types often differ here, and it could be that while
7334 // we'd prefer to widen the result type, the input operands have been split.
7335 // In this case, we also need to split the result of this node as well.
7336 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
7337 SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
7338 SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
7339 return Res;
7340 }
7341
7342 // If the inputs also widen, handle them directly. Otherwise widen by hand.
7343 SDValue InOp2 = N->getOperand(1);
7344 if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
7345 InOp1 = GetWidenedVector(InOp1);
7346 InOp2 = GetWidenedVector(InOp2);
7347 } else {
7348 SDValue Poison = DAG.getPOISON(WidenInVT);
7349 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
7350 InOp1 = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), WidenInVT, Poison,
7351 InOp1, ZeroIdx);
7352 InOp2 = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), WidenInVT, Poison,
7353 InOp2, ZeroIdx);
7354 }
7355
7356 // Assume that the input and output will be widen appropriately. If not,
7357 // we will have to unroll it at some point.
7358 assert(InOp1.getValueType() == WidenInVT &&
7359 InOp2.getValueType() == WidenInVT &&
7360 "Input not widened to expected type!");
7361 (void)WidenInVT;
7362 if (N->getOpcode() == ISD::VP_SETCC) {
7363 SDValue Mask =
7364 GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
7365 return DAG.getNode(ISD::VP_SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
7366 N->getOperand(2), Mask, N->getOperand(4));
7367 }
7368 return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
7369 N->getOperand(2));
7370}
7371
7372SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
7373 assert(N->getValueType(0).isVector() &&
7374 N->getOperand(1).getValueType().isVector() &&
7375 "Operands must be vectors");
7376 EVT VT = N->getValueType(0);
7377 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7378 unsigned WidenNumElts = WidenVT.getVectorNumElements();
7379 unsigned NumElts = VT.getVectorNumElements();
7380 EVT EltVT = VT.getVectorElementType();
7381
7382 SDLoc dl(N);
7383 SDValue Chain = N->getOperand(0);
7384 SDValue LHS = N->getOperand(1);
7385 SDValue RHS = N->getOperand(2);
7386 SDValue CC = N->getOperand(3);
7387 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
7388
7389 // Fully unroll and reassemble.
7390 SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT));
7391 SmallVector<SDValue, 8> Chains(NumElts);
7392 for (unsigned i = 0; i != NumElts; ++i) {
7393 SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
7394 SDValue RHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, RHS, i);
7395
7396 Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
7397 {Chain, LHSElem, RHSElem, CC});
7398 Chains[i] = Scalars[i].getValue(1);
7399 Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
7400 DAG.getBoolConstant(true, dl, EltVT, VT),
7401 DAG.getBoolConstant(false, dl, EltVT, VT));
7402 }
7403
7404 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
7405 ReplaceValueWith(SDValue(N, 1), NewChain);
7406
7407 return DAG.getBuildVector(WidenVT, dl, Scalars);
7408}
7409
7410//===----------------------------------------------------------------------===//
7411// Widen Vector Operand
7412//===----------------------------------------------------------------------===//
7413bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
7414 LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG));
7415 SDValue Res = SDValue();
7416
7417 // See if the target wants to custom widen this node.
7418 if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
7419 return false;
7420
7421 switch (N->getOpcode()) {
7422 default:
7423#ifndef NDEBUG
7424 dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
7425 N->dump(&DAG);
7426 dbgs() << "\n";
7427#endif
7428 report_fatal_error("Do not know how to widen this operator's operand!");
7429
7430 case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
7431 case ISD::FAKE_USE:
7432 Res = WidenVecOp_FAKE_USE(N);
7433 break;
7434 case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
7435 case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break;
7436 case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
7437 case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
7438 case ISD::STORE: Res = WidenVecOp_STORE(N); break;
7439 case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
7440 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7441 Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
7442 break;
7446 Res = WidenVecOp_EXTEND_VECTOR_INREG(N);
7447 break;
7448 case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
7449 case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
7450 case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
7451 case ISD::VP_SCATTER: Res = WidenVecOp_VP_SCATTER(N, OpNo); break;
7452 case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
7453 case ISD::STRICT_FSETCC:
7454 case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
7455 case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
7456 case ISD::FLDEXP:
7457 case ISD::FCOPYSIGN:
7458 case ISD::LROUND:
7459 case ISD::LLROUND:
7460 case ISD::LRINT:
7461 case ISD::LLRINT:
7462 Res = WidenVecOp_UnrollVectorOp(N);
7463 break;
7464 case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
7465
7466 case ISD::ANY_EXTEND:
7467 case ISD::SIGN_EXTEND:
7468 case ISD::ZERO_EXTEND:
7469 Res = WidenVecOp_EXTEND(N);
7470 break;
7471
7472 case ISD::SCMP:
7473 case ISD::UCMP:
7474 Res = WidenVecOp_CMP(N);
7475 break;
7476
7477 case ISD::FP_EXTEND:
7479 case ISD::FP_ROUND:
7481 case ISD::FP_TO_SINT:
7483 case ISD::FP_TO_UINT:
7485 case ISD::SINT_TO_FP:
7487 case ISD::UINT_TO_FP:
7489 case ISD::TRUNCATE:
7491 Res = WidenVecOp_Convert(N);
7492 break;
7493
7496 Res = WidenVecOp_FP_TO_XINT_SAT(N);
7497 break;
7498
7501 case ISD::VECREDUCE_ADD:
7502 case ISD::VECREDUCE_MUL:
7503 case ISD::VECREDUCE_AND:
7504 case ISD::VECREDUCE_OR:
7505 case ISD::VECREDUCE_XOR:
7514 Res = WidenVecOp_VECREDUCE(N);
7515 break;
7518 Res = WidenVecOp_VECREDUCE_SEQ(N);
7519 break;
7520 case ISD::VP_REDUCE_FADD:
7521 case ISD::VP_REDUCE_SEQ_FADD:
7522 case ISD::VP_REDUCE_FMUL:
7523 case ISD::VP_REDUCE_SEQ_FMUL:
7524 case ISD::VP_REDUCE_ADD:
7525 case ISD::VP_REDUCE_MUL:
7526 case ISD::VP_REDUCE_AND:
7527 case ISD::VP_REDUCE_OR:
7528 case ISD::VP_REDUCE_XOR:
7529 case ISD::VP_REDUCE_SMAX:
7530 case ISD::VP_REDUCE_SMIN:
7531 case ISD::VP_REDUCE_UMAX:
7532 case ISD::VP_REDUCE_UMIN:
7533 case ISD::VP_REDUCE_FMAX:
7534 case ISD::VP_REDUCE_FMIN:
7535 case ISD::VP_REDUCE_FMAXIMUM:
7536 case ISD::VP_REDUCE_FMINIMUM:
7537 Res = WidenVecOp_VP_REDUCE(N);
7538 break;
7539 case ISD::VP_CTTZ_ELTS:
7540 case ISD::VP_CTTZ_ELTS_ZERO_POISON:
7541 Res = WidenVecOp_VP_CttzElements(N);
7542 break;
7544 Res = WidenVecOp_VECTOR_FIND_LAST_ACTIVE(N);
7545 break;
7546 }
7547
7548 // If Res is null, the sub-method took care of registering the result.
7549 if (!Res.getNode()) return false;
7550
7551 // If the result is N, the sub-method updated N in place. Tell the legalizer
7552 // core about this.
7553 if (Res.getNode() == N)
7554 return true;
7555
7556
7557 if (N->isStrictFPOpcode())
7558 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
7559 "Invalid operand expansion");
7560 else
7561 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
7562 "Invalid operand expansion");
7563
7564 ReplaceValueWith(SDValue(N, 0), Res);
7565 return false;
7566}
7567
7568SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
7569 SDLoc DL(N);
7570 EVT VT = N->getValueType(0);
7571
7572 SDValue InOp = N->getOperand(0);
7573 assert(getTypeAction(InOp.getValueType()) ==
7575 "Unexpected type action");
7576 InOp = GetWidenedVector(InOp);
7579 "Input wasn't widened!");
7580
7581 // We may need to further widen the operand until it has the same total
7582 // vector size as the result.
7583 EVT InVT = InOp.getValueType();
7584 if (InVT.getSizeInBits() != VT.getSizeInBits()) {
7585 EVT InEltVT = InVT.getVectorElementType();
7586 for (EVT FixedVT : MVT::vector_valuetypes()) {
7587 EVT FixedEltVT = FixedVT.getVectorElementType();
7588 if (TLI.isTypeLegal(FixedVT) &&
7589 FixedVT.getSizeInBits() == VT.getSizeInBits() &&
7590 FixedEltVT == InEltVT) {
7591 assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
7592 "Not enough elements in the fixed type for the operand!");
7593 assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
7594 "We can't have the same type as we started with!");
7595 if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
7596 InOp = DAG.getInsertSubvector(DL, DAG.getPOISON(FixedVT), InOp, 0);
7597 else
7598 InOp = DAG.getExtractSubvector(DL, FixedVT, InOp, 0);
7599 break;
7600 }
7601 }
7602 InVT = InOp.getValueType();
7603 if (InVT.getSizeInBits() != VT.getSizeInBits())
7604 // We couldn't find a legal vector type that was a widening of the input
7605 // and could be extended in-register to the result type, so we have to
7606 // scalarize.
7607 return WidenVecOp_Convert(N);
7608 }
7609
7610 // Use special DAG nodes to represent the operation of extending the
7611 // low lanes.
7612 switch (N->getOpcode()) {
7613 default:
7614 llvm_unreachable("Extend legalization on extend operation!");
7615 case ISD::ANY_EXTEND:
7616 return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp);
7617 case ISD::SIGN_EXTEND:
7618 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp);
7619 case ISD::ZERO_EXTEND:
7620 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp);
7621 }
7622}
7623
7624SDValue DAGTypeLegalizer::WidenVecOp_CMP(SDNode *N) {
7625 SDLoc dl(N);
7626
7627 EVT OpVT = N->getOperand(0).getValueType();
7628 EVT ResVT = N->getValueType(0);
7629 SDValue LHS = GetWidenedVector(N->getOperand(0));
7630 SDValue RHS = GetWidenedVector(N->getOperand(1));
7631
7632 // 1. EXTRACT_SUBVECTOR
7633 // 2. SIGN_EXTEND/ZERO_EXTEND
7634 // 3. CMP
7635 LHS = DAG.getExtractSubvector(dl, OpVT, LHS, 0);
7636 RHS = DAG.getExtractSubvector(dl, OpVT, RHS, 0);
7637
7638 // At this point the result type is guaranteed to be valid, so we can use it
7639 // as the operand type by extending it appropriately
7640 ISD::NodeType ExtendOpcode =
7641 N->getOpcode() == ISD::SCMP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7642 LHS = DAG.getNode(ExtendOpcode, dl, ResVT, LHS);
7643 RHS = DAG.getNode(ExtendOpcode, dl, ResVT, RHS);
7644
7645 return DAG.getNode(N->getOpcode(), dl, ResVT, LHS, RHS);
7646}
7647
7648SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) {
7649 // The result (and first input) is legal, but the second input is illegal.
7650 // We can't do much to fix that, so just unroll and let the extracts off of
7651 // the second input be widened as needed later.
7652 return DAG.UnrollVectorOp(N);
7653}
7654
7655SDValue DAGTypeLegalizer::WidenVecOp_IS_FPCLASS(SDNode *N) {
7656 SDLoc DL(N);
7657 EVT ResultVT = N->getValueType(0);
7658 SDValue Test = N->getOperand(1);
7659 SDValue WideArg = GetWidenedVector(N->getOperand(0));
7660
7661 // Process this node similarly to SETCC.
7662 EVT WideResultVT = getSetCCResultType(WideArg.getValueType());
7663 if (ResultVT.getScalarType() == MVT::i1)
7664 WideResultVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
7665 WideResultVT.getVectorNumElements());
7666
7667 SDValue WideNode = DAG.getNode(ISD::IS_FPCLASS, DL, WideResultVT,
7668 {WideArg, Test}, N->getFlags());
7669
7670 // Extract the needed results from the result vector.
7671 EVT ResVT =
7672 EVT::getVectorVT(*DAG.getContext(), WideResultVT.getVectorElementType(),
7673 ResultVT.getVectorNumElements());
7674 SDValue CC = DAG.getExtractSubvector(DL, ResVT, WideNode, 0);
7675
7676 EVT OpVT = N->getOperand(0).getValueType();
7677 ISD::NodeType ExtendCode =
7678 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
7679 return DAG.getNode(ExtendCode, DL, ResultVT, CC);
7680}
7681
7682SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
7683 // Since the result is legal and the input is illegal.
7684 EVT VT = N->getValueType(0);
7685 EVT EltVT = VT.getVectorElementType();
7686 SDLoc dl(N);
7687 SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
7688 assert(getTypeAction(InOp.getValueType()) ==
7690 "Unexpected type action");
7691 InOp = GetWidenedVector(InOp);
7692 EVT InVT = InOp.getValueType();
7693 unsigned Opcode = N->getOpcode();
7694
7695 // See if a widened result type would be legal, if so widen the node.
7696 // FIXME: This isn't safe for StrictFP. Other optimization here is needed.
7697 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
7698 InVT.getVectorElementCount());
7699 if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
7700 SDValue Res;
7701 if (N->isStrictFPOpcode()) {
7702 if (Opcode == ISD::STRICT_FP_ROUND)
7703 Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
7704 { N->getOperand(0), InOp, N->getOperand(2) });
7705 else
7706 Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
7707 { N->getOperand(0), InOp });
7708 // Legalize the chain result - switch anything that used the old chain to
7709 // use the new one.
7710 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
7711 } else {
7712 if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP)
7713 Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1));
7714 else
7715 Res = DAG.getNode(Opcode, dl, WideVT, InOp);
7716 }
7717 return DAG.getExtractSubvector(dl, VT, Res, 0);
7718 }
7719
7720 EVT InEltVT = InVT.getVectorElementType();
7721
7722 // Unroll the convert into some scalar code and create a nasty build vector.
7723 unsigned NumElts = VT.getVectorNumElements();
7725 if (N->isStrictFPOpcode()) {
7726 SmallVector<SDValue, 4> NewOps(N->ops());
7727 SmallVector<SDValue, 32> OpChains;
7728 for (unsigned i=0; i < NumElts; ++i) {
7729 NewOps[1] = DAG.getExtractVectorElt(dl, InEltVT, InOp, i);
7730 Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
7731 OpChains.push_back(Ops[i].getValue(1));
7732 }
7733 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
7734 ReplaceValueWith(SDValue(N, 1), NewChain);
7735 } else {
7736 for (unsigned i = 0; i < NumElts; ++i) {
7737 SDValue Elt = DAG.getExtractVectorElt(dl, InEltVT, InOp, i);
7738 if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP)
7739 Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt, N->getOperand(1));
7740 else
7741 Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt);
7742 }
7743 }
7744
7745 return DAG.getBuildVector(VT, dl, Ops);
7746}
7747
7748SDValue DAGTypeLegalizer::WidenVecOp_FP_TO_XINT_SAT(SDNode *N) {
7749 EVT DstVT = N->getValueType(0);
7750 SDValue Src = GetWidenedVector(N->getOperand(0));
7751 EVT SrcVT = Src.getValueType();
7752 ElementCount WideNumElts = SrcVT.getVectorElementCount();
7753 SDLoc dl(N);
7754
7755 // See if a widened result type would be legal, if so widen the node.
7756 EVT WideDstVT = EVT::getVectorVT(*DAG.getContext(),
7757 DstVT.getVectorElementType(), WideNumElts);
7758 if (TLI.isTypeLegal(WideDstVT)) {
7759 SDValue Res =
7760 DAG.getNode(N->getOpcode(), dl, WideDstVT, Src, N->getOperand(1));
7761 return DAG.getNode(
7762 ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
7763 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
7764 }
7765
7766 // Give up and unroll.
7767 return DAG.UnrollVectorOp(N);
7768}
7769
7770SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
7771 EVT VT = N->getValueType(0);
7772 SDValue InOp = GetWidenedVector(N->getOperand(0));
7773 EVT InWidenVT = InOp.getValueType();
7774 SDLoc dl(N);
7775
7776 // Check if we can convert between two legal vector types and extract.
7777 TypeSize InWidenSize = InWidenVT.getSizeInBits();
7778 TypeSize Size = VT.getSizeInBits();
7779 // x86mmx is not an acceptable vector element type, so don't try.
7780 if (!VT.isVector() && VT != MVT::x86mmx &&
7781 InWidenSize.hasKnownScalarFactor(Size)) {
7782 unsigned NewNumElts = InWidenSize.getKnownScalarFactor(Size);
7783 EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
7784 if (TLI.isTypeLegal(NewVT)) {
7785 SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
7786 return DAG.getExtractVectorElt(dl, VT, BitOp, 0);
7787 }
7788 }
7789
7790 // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened
7791 // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not,
7792 // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids
7793 // having to copy via memory.
7794 if (VT.isVector()) {
7795 EVT EltVT = VT.getVectorElementType();
7796 unsigned EltSize = EltVT.getFixedSizeInBits();
7797 if (InWidenSize.isKnownMultipleOf(EltSize)) {
7798 ElementCount NewNumElts =
7799 (InWidenVT.getVectorElementCount() * InWidenVT.getScalarSizeInBits())
7800 .divideCoefficientBy(EltSize);
7801 EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
7802 if (TLI.isTypeLegal(NewVT)) {
7803 SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
7804 return DAG.getExtractSubvector(dl, VT, BitOp, 0);
7805 }
7806 }
7807 }
7808
7809 return CreateStackStoreLoad(InOp, VT);
7810}
7811
7812// Vectors with sizes that are not powers of 2 need to be widened to the
7813// next largest power of 2. For example, we may get a vector of 3 32-bit
7814// integers or of 6 16-bit integers, both of which have to be widened to a
7815// 128-bit vector.
7816SDValue DAGTypeLegalizer::WidenVecOp_FAKE_USE(SDNode *N) {
7817 SDValue WidenedOp = GetWidenedVector(N->getOperand(1));
7818 return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0),
7819 WidenedOp);
7820}
7821
7822SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
7823 EVT VT = N->getValueType(0);
7824 EVT EltVT = VT.getVectorElementType();
7825 EVT InVT = N->getOperand(0).getValueType();
7826 SDLoc dl(N);
7827
7828 // If the widen width for this operand is the same as the width of the concat
7829 // and all but the first operand is undef, just use the widened operand.
7830 unsigned NumOperands = N->getNumOperands();
7831 if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
7832 unsigned i;
7833 for (i = 1; i < NumOperands; ++i)
7834 if (!N->getOperand(i).isUndef())
7835 break;
7836
7837 if (i == NumOperands)
7838 return GetWidenedVector(N->getOperand(0));
7839 }
7840
7841 // Otherwise, fall back to a nasty build vector.
7842 unsigned NumElts = VT.getVectorNumElements();
7844
7845 unsigned NumInElts = InVT.getVectorNumElements();
7846
7847 unsigned Idx = 0;
7848 for (unsigned i=0; i < NumOperands; ++i) {
7849 SDValue InOp = N->getOperand(i);
7850 assert(getTypeAction(InOp.getValueType()) ==
7852 "Unexpected type action");
7853 InOp = GetWidenedVector(InOp);
7854 for (unsigned j = 0; j < NumInElts; ++j)
7855 Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
7856 }
7857 return DAG.getBuildVector(VT, dl, Ops);
7858}
7859
7860SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
7861 EVT VT = N->getValueType(0);
7862 SDValue SubVec = N->getOperand(1);
7863 SDValue InVec = N->getOperand(0);
7864
7865 EVT OrigVT = SubVec.getValueType();
7866 SubVec = GetWidenedVector(SubVec);
7867 EVT SubVT = SubVec.getValueType();
7868
7869 // Whether or not all the elements of the widened SubVec will be inserted into
7870 // valid indices of VT.
7871 bool IndicesValid = false;
7872 // If we statically know that VT can fit SubVT, the indices are valid.
7873 if (VT.knownBitsGE(SubVT))
7874 IndicesValid = true;
7875 else if (VT.isScalableVector() && SubVT.isFixedLengthVector()) {
7876 // Otherwise, if we're inserting a fixed vector into a scalable vector and
7877 // we know the minimum vscale we can work out if it's valid ourselves.
7878 Attribute Attr = DAG.getMachineFunction().getFunction().getFnAttribute(
7879 Attribute::VScaleRange);
7880 if (Attr.isValid()) {
7881 unsigned VScaleMin = Attr.getVScaleRangeMin();
7882 if (VT.getSizeInBits().getKnownMinValue() * VScaleMin >=
7883 SubVT.getFixedSizeInBits())
7884 IndicesValid = true;
7885 }
7886 }
7887
7888 if (!IndicesValid)
7890 "Don't know how to widen the operands for INSERT_SUBVECTOR");
7891
7892 SDLoc DL(N);
7893
7894 // We need to make sure that the indices are still valid, otherwise we might
7895 // widen what was previously well-defined to something undefined.
7896 if (InVec.isUndef() && N->getConstantOperandVal(2) == 0)
7897 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec,
7898 N->getOperand(2));
7899
7900 if (OrigVT.isScalableVector()) {
7901 // When the widened types match, overwriting the start of a vector is
7902 // effectively a merge operation that can be implement as a vselect.
7903 if (SubVT == VT && N->getConstantOperandVal(2) == 0) {
7904 SDValue Mask =
7905 DAG.getMaskFromElementCount(DL, VT, OrigVT.getVectorElementCount());
7906 return DAG.getNode(ISD::VSELECT, DL, VT, Mask, SubVec, InVec);
7907 }
7908
7909 // Fallback to inserting through memory.
7910 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
7911 SDValue StackPtr = DAG.CreateStackTemporary(VT.getStoreSize(), Alignment);
7912 MachineFunction &MF = DAG.getMachineFunction();
7913 int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7914 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
7915
7916 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
7919 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
7922
7923 // Write out the vector being inserting into.
7924 SDValue Ch =
7925 DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr, StoreMMO);
7926
7927 // Build a mask to match the length of the sub-vector.
7928 SDValue Mask =
7929 DAG.getMaskFromElementCount(DL, SubVT, OrigVT.getVectorElementCount());
7930
7931 // Overwrite the sub-vector at the required offset.
7932 SDValue SubVecPtr =
7933 TLI.getVectorSubVecPointer(DAG, StackPtr, VT, OrigVT, N->getOperand(2));
7934 Ch = DAG.getMaskedStore(Ch, DL, SubVec, SubVecPtr,
7935 DAG.getPOISON(SubVecPtr.getValueType()), Mask, VT,
7936 StoreMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
7937
7938 // Read back the result.
7939 return DAG.getLoad(VT, DL, Ch, StackPtr, LoadMMO);
7940 }
7941
7942 // If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
7943 // with a series of INSERT_VECTOR_ELT
7944 unsigned Idx = N->getConstantOperandVal(2);
7945
7946 SDValue InsertElt = InVec;
7947 for (unsigned I = 0, E = OrigVT.getVectorNumElements(); I != E; ++I) {
7948 SDValue ExtractElt =
7949 DAG.getExtractVectorElt(DL, VT.getVectorElementType(), SubVec, I);
7950 InsertElt = DAG.getInsertVectorElt(DL, InsertElt, ExtractElt, I + Idx);
7951 }
7952
7953 return InsertElt;
7954}
7955
7956SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
7957 SDValue InOp = GetWidenedVector(N->getOperand(0));
7958 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
7959 N->getValueType(0), InOp, N->getOperand(1));
7960}
7961
7962SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
7963 SDValue InOp = GetWidenedVector(N->getOperand(0));
7964 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
7965 N->getValueType(0), InOp, N->getOperand(1));
7966}
7967
7968SDValue DAGTypeLegalizer::WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N) {
7969 SDLoc DL(N);
7970 EVT ResVT = N->getValueType(0);
7971
7972 // Widen the input as requested by the legalizer.
7973 SDValue WideInOp = GetWidenedVector(N->getOperand(0));
7974 EVT WideInVT = WideInOp.getValueType();
7975
7976 // Simple case: if widened input is still smaller than or equal to result,
7977 // just use it directly.
7978 if (WideInVT.getSizeInBits() <= ResVT.getSizeInBits())
7979 return DAG.getNode(N->getOpcode(), DL, ResVT, WideInOp);
7980
7981 // EXTEND_VECTOR_INREG requires input bits <= result bits.
7982 // If widening makes the input larger than the original result, widen the
7983 // result to match, then extract back down.
7984 EVT ResEltVT = ResVT.getVectorElementType();
7985 unsigned EltBits = ResEltVT.getSizeInBits();
7986 assert((WideInVT.getSizeInBits() % EltBits) == 0 &&
7987 "Widened input size must be a multiple of result element size");
7988
7989 unsigned WideNumElts = WideInVT.getSizeInBits() / EltBits;
7990 EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), ResEltVT, WideNumElts);
7991
7992 SDValue WideRes = DAG.getNode(N->getOpcode(), DL, WideResVT, WideInOp);
7993 return DAG.getExtractSubvector(DL, ResVT, WideRes, 0);
7994}
7995
7996SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
7997 // We have to widen the value, but we want only to store the original
7998 // vector type.
7999 StoreSDNode *ST = cast<StoreSDNode>(N);
8000
8001 if (!ST->getMemoryVT().getScalarType().isByteSized())
8002 return TLI.scalarizeVectorStore(ST, DAG);
8003
8004 if (ST->isTruncatingStore())
8005 return TLI.scalarizeVectorStore(ST, DAG);
8006
8007 // Generate a vector-predicated store if it is custom/legal on the target.
8008 // To avoid possible recursion, only do this if the widened mask type is
8009 // legal.
8010 // FIXME: Not all targets may support EVL in VP_STORE. These will have been
8011 // removed from the IR by the ExpandVectorPredication pass but we're
8012 // reintroducing them here.
8013 SDValue StVal = ST->getValue();
8014 EVT StVT = StVal.getValueType();
8015 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
8016 EVT WideMaskVT = getSetCCResultType(WideVT);
8017
8018 if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
8019 TLI.isTypeLegal(WideMaskVT)) {
8020 // Widen the value.
8021 SDLoc DL(N);
8022 StVal = GetWidenedVector(StVal);
8023 SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
8024 SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
8025 StVT.getVectorElementCount());
8026 return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(),
8027 ST->getOffset(), Mask, EVL, StVT, ST->getMemOperand(),
8028 ST->getAddressingMode());
8029 }
8030
8032 if (GenWidenVectorStores(StChain, ST)) {
8033 if (StChain.size() == 1)
8034 return StChain[0];
8035
8036 return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
8037 }
8038
8039 if (StVT.isVector()) {
8040 // If all else fails replace the store with a wide masked store.
8041 SDLoc DL(N);
8042 SDValue WideStVal = GetWidenedVector(StVal);
8043 SDValue Mask =
8044 DAG.getMaskFromElementCount(DL, WideVT, StVT.getVectorElementCount());
8045
8046 return DAG.getMaskedStore(ST->getChain(), DL, WideStVal, ST->getBasePtr(),
8047 ST->getOffset(), Mask, ST->getMemoryVT(),
8048 ST->getMemOperand(), ST->getAddressingMode(),
8049 ST->isTruncatingStore());
8050 }
8051
8052 report_fatal_error("Unable to widen vector store");
8053}
8054
8055SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
8056 assert((OpNo == 1 || OpNo == 3) &&
8057 "Can widen only data or mask operand of vp_store");
8058 VPStoreSDNode *ST = cast<VPStoreSDNode>(N);
8059 SDValue Mask = ST->getMask();
8060 SDValue StVal = ST->getValue();
8061 SDLoc dl(N);
8062
8063 if (OpNo == 1) {
8064 // Widen the value.
8065 StVal = GetWidenedVector(StVal);
8066
8067 // We only handle the case where the mask needs widening to an
8068 // identically-sized type as the vector inputs.
8069 assert(getTypeAction(Mask.getValueType()) ==
8071 "Unable to widen VP store");
8072 Mask = GetWidenedVector(Mask);
8073 } else {
8074 Mask = GetWidenedVector(Mask);
8075
8076 // We only handle the case where the stored value needs widening to an
8077 // identically-sized type as the mask.
8078 assert(getTypeAction(StVal.getValueType()) ==
8080 "Unable to widen VP store");
8081 StVal = GetWidenedVector(StVal);
8082 }
8083
8084 assert(Mask.getValueType().getVectorElementCount() ==
8086 "Mask and data vectors should have the same number of elements");
8087 return DAG.getStoreVP(ST->getChain(), dl, StVal, ST->getBasePtr(),
8088 ST->getOffset(), Mask, ST->getVectorLength(),
8089 ST->getMemoryVT(), ST->getMemOperand(),
8090 ST->getAddressingMode(), ST->isTruncatingStore(),
8091 ST->isCompressingStore());
8092}
8093
8094SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N,
8095 unsigned OpNo) {
8096 assert((OpNo == 1 || OpNo == 4) &&
8097 "Can widen only data or mask operand of vp_strided_store");
8098 VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
8099 SDValue Mask = SST->getMask();
8100 SDValue StVal = SST->getValue();
8101 SDLoc DL(N);
8102
8103 if (OpNo == 1)
8104 assert(getTypeAction(Mask.getValueType()) ==
8106 "Unable to widen VP strided store");
8107 else
8108 assert(getTypeAction(StVal.getValueType()) ==
8110 "Unable to widen VP strided store");
8111
8112 StVal = GetWidenedVector(StVal);
8113 Mask = GetWidenedVector(Mask);
8114
8116 Mask.getValueType().getVectorElementCount() &&
8117 "Data and mask vectors should have the same number of elements");
8118
8119 return DAG.getStridedStoreVP(
8120 SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(),
8121 SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(),
8122 SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(),
8123 SST->isCompressingStore());
8124}
8125
8126SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
8127 assert((OpNo == 1 || OpNo == 4) &&
8128 "Can widen only data or mask operand of mstore");
8129 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8130 SDValue Mask = MST->getMask();
8131 EVT MaskVT = Mask.getValueType();
8132 SDValue StVal = MST->getValue();
8133 EVT VT = StVal.getValueType();
8134 SDLoc dl(N);
8135
8136 EVT WideVT, WideMaskVT;
8137 if (OpNo == 1) {
8138 // Widen the value.
8139 StVal = GetWidenedVector(StVal);
8140
8141 WideVT = StVal.getValueType();
8142 WideMaskVT =
8143 EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(),
8144 WideVT.getVectorElementCount());
8145 } else {
8146 WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
8147
8148 EVT ValueVT = StVal.getValueType();
8149 WideVT = EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
8150 WideMaskVT.getVectorElementCount());
8151 }
8152
8153 if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
8154 TLI.isTypeLegal(WideMaskVT) && !MST->isCompressingStore()) {
8155 Mask = DAG.getInsertSubvector(dl, DAG.getPOISON(WideMaskVT), Mask, 0);
8156 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
8158 return DAG.getStoreVP(MST->getChain(), dl, StVal, MST->getBasePtr(),
8159 MST->getOffset(), Mask, EVL, MST->getMemoryVT(),
8160 MST->getMemOperand(), MST->getAddressingMode());
8161 }
8162
8163 if (OpNo == 1) {
8164 // The mask should be widened as well.
8165 Mask = ModifyToType(Mask, WideMaskVT, true);
8166 } else {
8167 // Widen the mask.
8168 Mask = ModifyToType(Mask, WideMaskVT, true);
8169
8170 StVal = ModifyToType(StVal, WideVT);
8171 }
8172
8173 assert(Mask.getValueType().getVectorElementCount() ==
8175 "Mask and data vectors should have the same number of elements");
8176 return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
8177 MST->getOffset(), Mask, MST->getMemoryVT(),
8178 MST->getMemOperand(), MST->getAddressingMode(),
8179 false, MST->isCompressingStore());
8180}
8181
8182SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
8183 assert(OpNo == 4 && "Can widen only the index of mgather");
8184 auto *MG = cast<MaskedGatherSDNode>(N);
8185 SDValue DataOp = MG->getPassThru();
8186 SDValue Mask = MG->getMask();
8187 SDValue Scale = MG->getScale();
8188
8189 // Just widen the index. It's allowed to have extra elements.
8190 SDValue Index = GetWidenedVector(MG->getIndex());
8191
8192 SDLoc dl(N);
8193 SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
8194 Scale};
8195 SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
8196 MG->getMemOperand(), MG->getIndexType(),
8197 MG->getExtensionType());
8198 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
8199 ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
8200 return SDValue();
8201}
8202
8203SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
8204 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8205 SDValue DataOp = MSC->getValue();
8206 SDValue Mask = MSC->getMask();
8207 SDValue Index = MSC->getIndex();
8208 SDValue Scale = MSC->getScale();
8209 EVT WideMemVT = MSC->getMemoryVT();
8210
8211 if (OpNo == 1) {
8212 DataOp = GetWidenedVector(DataOp);
8213 unsigned NumElts = DataOp.getValueType().getVectorNumElements();
8214
8215 // Widen index.
8216 EVT IndexVT = Index.getValueType();
8217 EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
8218 IndexVT.getVectorElementType(), NumElts);
8219 Index = ModifyToType(Index, WideIndexVT);
8220
8221 // The mask should be widened as well.
8222 EVT MaskVT = Mask.getValueType();
8223 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
8224 MaskVT.getVectorElementType(), NumElts);
8225 Mask = ModifyToType(Mask, WideMaskVT, true);
8226
8227 // Widen the MemoryType
8228 WideMemVT = EVT::getVectorVT(*DAG.getContext(),
8229 MSC->getMemoryVT().getScalarType(), NumElts);
8230 } else if (OpNo == 4) {
8231 // Just widen the index. It's allowed to have extra elements.
8232 Index = GetWidenedVector(Index);
8233 } else
8234 llvm_unreachable("Can't widen this operand of mscatter");
8235
8236 SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
8237 Scale};
8238 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N),
8239 Ops, MSC->getMemOperand(), MSC->getIndexType(),
8240 MSC->isTruncatingStore());
8241}
8242
8243SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) {
8244 VPScatterSDNode *VPSC = cast<VPScatterSDNode>(N);
8245 SDValue DataOp = VPSC->getValue();
8246 SDValue Mask = VPSC->getMask();
8247 SDValue Index = VPSC->getIndex();
8248 SDValue Scale = VPSC->getScale();
8249 EVT WideMemVT = VPSC->getMemoryVT();
8250
8251 if (OpNo == 1) {
8252 DataOp = GetWidenedVector(DataOp);
8253 Index = GetWidenedVector(Index);
8254 const auto WideEC = DataOp.getValueType().getVectorElementCount();
8255 Mask = GetWidenedMask(Mask, WideEC);
8256 WideMemVT = EVT::getVectorVT(*DAG.getContext(),
8257 VPSC->getMemoryVT().getScalarType(), WideEC);
8258 } else if (OpNo == 3) {
8259 // Just widen the index. It's allowed to have extra elements.
8260 Index = GetWidenedVector(Index);
8261 } else
8262 llvm_unreachable("Can't widen this operand of VP_SCATTER");
8263
8264 SDValue Ops[] = {
8265 VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask,
8266 VPSC->getVectorLength()};
8267 return DAG.getScatterVP(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), Ops,
8268 VPSC->getMemOperand(), VPSC->getIndexType());
8269}
8270
8271SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
8272 SDValue InOp0 = GetWidenedVector(N->getOperand(0));
8273 SDValue InOp1 = GetWidenedVector(N->getOperand(1));
8274 SDLoc dl(N);
8275 EVT VT = N->getValueType(0);
8276
8277 // WARNING: In this code we widen the compare instruction with garbage.
8278 // This garbage may contain denormal floats which may be slow. Is this a real
8279 // concern ? Should we zero the unused lanes if this is a float compare ?
8280
8281 // Get a new SETCC node to compare the newly widened operands.
8282 // Only some of the compared elements are legal.
8283 EVT SVT = getSetCCResultType(InOp0.getValueType());
8284 // The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
8285 if (VT.getScalarType() == MVT::i1)
8286 SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
8287 SVT.getVectorElementCount());
8288
8289 SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
8290 SVT, InOp0, InOp1, N->getOperand(2));
8291
8292 // Extract the needed results from the result vector.
8293 EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
8296 SDValue CC = DAG.getExtractSubvector(dl, ResVT, WideSETCC, 0);
8297
8298 EVT OpVT = N->getOperand(0).getValueType();
8299 ISD::NodeType ExtendCode =
8300 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
8301 return DAG.getNode(ExtendCode, dl, VT, CC);
8302}
8303
8304SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
8305 SDValue Chain = N->getOperand(0);
8306 SDValue LHS = GetWidenedVector(N->getOperand(1));
8307 SDValue RHS = GetWidenedVector(N->getOperand(2));
8308 SDValue CC = N->getOperand(3);
8309 SDLoc dl(N);
8310
8311 EVT VT = N->getValueType(0);
8312 EVT EltVT = VT.getVectorElementType();
8313 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
8314 unsigned NumElts = VT.getVectorNumElements();
8315
8316 // Unroll into a build vector.
8317 SmallVector<SDValue, 8> Scalars(NumElts);
8318 SmallVector<SDValue, 8> Chains(NumElts);
8319
8320 for (unsigned i = 0; i != NumElts; ++i) {
8321 SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
8322 SDValue RHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, RHS, i);
8323
8324 Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
8325 {Chain, LHSElem, RHSElem, CC});
8326 Chains[i] = Scalars[i].getValue(1);
8327 Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
8328 DAG.getBoolConstant(true, dl, EltVT, VT),
8329 DAG.getBoolConstant(false, dl, EltVT, VT));
8330 }
8331
8332 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
8333 ReplaceValueWith(SDValue(N, 1), NewChain);
8334
8335 return DAG.getBuildVector(VT, dl, Scalars);
8336}
8337
8338static unsigned getExtendForIntVecReduction(unsigned Opc) {
8339 switch (Opc) {
8340 default:
8341 llvm_unreachable("Expected integer vector reduction");
8342 case ISD::VECREDUCE_ADD:
8343 case ISD::VECREDUCE_MUL:
8344 case ISD::VECREDUCE_AND:
8345 case ISD::VECREDUCE_OR:
8346 case ISD::VECREDUCE_XOR:
8347 return ISD::ANY_EXTEND;
8350 return ISD::SIGN_EXTEND;
8353 return ISD::ZERO_EXTEND;
8354 }
8355}
8356
8357SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
8358 SDLoc dl(N);
8359 SDValue Op = GetWidenedVector(N->getOperand(0));
8360 EVT VT = N->getValueType(0);
8361 EVT OrigVT = N->getOperand(0).getValueType();
8362 EVT WideVT = Op.getValueType();
8363 EVT ElemVT = OrigVT.getVectorElementType();
8364 SDNodeFlags Flags = N->getFlags();
8365
8366 unsigned Opc = N->getOpcode();
8367 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
8368 SDValue NeutralElem = DAG.getIdentityElement(BaseOpc, dl, ElemVT, Flags);
8369 assert(NeutralElem && "Neutral element must exist");
8370
8371 // Pad the vector with the neutral element.
8372 unsigned OrigElts = OrigVT.getVectorMinNumElements();
8373 unsigned WideElts = WideVT.getVectorMinNumElements();
8374
8375 // Generate a vp.reduce_op if it is custom/legal for the target. This avoids
8376 // needing to pad the source vector, because the inactive lanes can simply be
8377 // disabled and not contribute to the result.
8378 if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc);
8379 VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) {
8380 SDValue Start = NeutralElem;
8381 if (VT.isInteger())
8382 Start = DAG.getNode(getExtendForIntVecReduction(Opc), dl, VT, Start);
8383 assert(Start.getValueType() == VT);
8384 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
8385 WideVT.getVectorElementCount());
8386 SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
8387 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
8388 OrigVT.getVectorElementCount());
8389 return DAG.getNode(*VPOpcode, dl, VT, {Start, Op, Mask, EVL}, Flags);
8390 }
8391
8392 if (WideVT.isScalableVector()) {
8393 unsigned GCD = std::gcd(OrigElts, WideElts);
8394 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
8396 SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
8397 for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
8398 Op = DAG.getInsertSubvector(dl, Op, SplatNeutral, Idx);
8399 return DAG.getNode(Opc, dl, VT, Op, Flags);
8400 }
8401
8402 for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
8403 Op = DAG.getInsertVectorElt(dl, Op, NeutralElem, Idx);
8404
8405 return DAG.getNode(Opc, dl, VT, Op, Flags);
8406}
8407
8408SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
8409 SDLoc dl(N);
8410 SDValue AccOp = N->getOperand(0);
8411 SDValue VecOp = N->getOperand(1);
8412 SDValue Op = GetWidenedVector(VecOp);
8413
8414 EVT VT = N->getValueType(0);
8415 EVT OrigVT = VecOp.getValueType();
8416 EVT WideVT = Op.getValueType();
8417 EVT ElemVT = OrigVT.getVectorElementType();
8418 SDNodeFlags Flags = N->getFlags();
8419
8420 unsigned Opc = N->getOpcode();
8421 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
8422 SDValue NeutralElem = DAG.getIdentityElement(BaseOpc, dl, ElemVT, Flags);
8423
8424 // Pad the vector with the neutral element.
8425 unsigned OrigElts = OrigVT.getVectorMinNumElements();
8426 unsigned WideElts = WideVT.getVectorMinNumElements();
8427
8428 // Generate a vp.reduce_op if it is custom/legal for the target. This avoids
8429 // needing to pad the source vector, because the inactive lanes can simply be
8430 // disabled and not contribute to the result.
8431 if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc);
8432 VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) {
8433 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
8434 WideVT.getVectorElementCount());
8435 SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
8436 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
8437 OrigVT.getVectorElementCount());
8438 return DAG.getNode(*VPOpcode, dl, VT, {AccOp, Op, Mask, EVL}, Flags);
8439 }
8440
8441 if (WideVT.isScalableVector()) {
8442 unsigned GCD = std::gcd(OrigElts, WideElts);
8443 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
8445 SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
8446 for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
8447 Op = DAG.getInsertSubvector(dl, Op, SplatNeutral, Idx);
8448 return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags);
8449 }
8450
8451 for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
8452 Op = DAG.getInsertVectorElt(dl, Op, NeutralElem, Idx);
8453
8454 return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags);
8455}
8456
8457SDValue DAGTypeLegalizer::WidenVecOp_VP_REDUCE(SDNode *N) {
8458 assert(N->isVPOpcode() && "Expected VP opcode");
8459
8460 SDLoc dl(N);
8461 SDValue Op = GetWidenedVector(N->getOperand(1));
8462 SDValue Mask = GetWidenedMask(N->getOperand(2),
8463 Op.getValueType().getVectorElementCount());
8464
8465 return DAG.getNode(N->getOpcode(), dl, N->getValueType(0),
8466 {N->getOperand(0), Op, Mask, N->getOperand(3)},
8467 N->getFlags());
8468}
8469
8470SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
8471 // This only gets called in the case that the left and right inputs and
8472 // result are of a legal odd vector type, and the condition is illegal i1 of
8473 // the same odd width that needs widening.
8474 EVT VT = N->getValueType(0);
8475 assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT));
8476
8477 SDValue Cond = GetWidenedVector(N->getOperand(0));
8478 SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N));
8479 SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N));
8480 SDLoc DL(N);
8481
8482 SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond,
8483 LeftIn, RightIn);
8484 return DAG.getExtractSubvector(DL, VT, Select, 0);
8485}
8486
8487SDValue DAGTypeLegalizer::WidenVecOp_VP_CttzElements(SDNode *N) {
8488 SDLoc DL(N);
8489 SDValue Source = GetWidenedVector(N->getOperand(0));
8490 EVT SrcVT = Source.getValueType();
8491 SDValue Mask =
8492 GetWidenedMask(N->getOperand(1), SrcVT.getVectorElementCount());
8493
8494 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0),
8495 {Source, Mask, N->getOperand(2)}, N->getFlags());
8496}
8497
8498SDValue DAGTypeLegalizer::WidenVecOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N) {
8499 SDLoc DL(N);
8500 SDValue Mask = N->getOperand(0);
8501 EVT OrigMaskVT = Mask.getValueType();
8502 SDValue WideMask = GetWidenedVector(Mask);
8503 EVT WideMaskVT = WideMask.getValueType();
8504
8505 // Pad the mask with zeros to ensure inactive lanes don't affect the result.
8506 unsigned OrigElts = OrigMaskVT.getVectorNumElements();
8507 unsigned WideElts = WideMaskVT.getVectorNumElements();
8508 if (OrigElts != WideElts) {
8509 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
8510 WideMask = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideMaskVT, ZeroMask,
8511 Mask, DAG.getVectorIdxConstant(0, DL));
8512 }
8513
8514 return DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, N->getValueType(0),
8515 WideMask);
8516}
8517
8518//===----------------------------------------------------------------------===//
8519// Vector Widening Utilities
8520//===----------------------------------------------------------------------===//
8521
8522// Utility function to find the type to chop up a widen vector for load/store
8523// TLI: Target lowering used to determine legal types.
8524// Width: Width left need to load/store.
8525// WidenVT: The widen vector type to load to/store from
8526// Align: If 0, don't allow use of a wider type
8527// WidenEx: If Align is not 0, the amount additional we can load/store from.
8528
8529static std::optional<EVT> findMemType(SelectionDAG &DAG,
8530 const TargetLowering &TLI, unsigned Width,
8531 EVT WidenVT, unsigned Align = 0,
8532 unsigned WidenEx = 0) {
8533 EVT WidenEltVT = WidenVT.getVectorElementType();
8534 const bool Scalable = WidenVT.isScalableVector();
8535 unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinValue();
8536 unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
8537 unsigned AlignInBits = Align*8;
8538
8539 EVT RetVT = WidenEltVT;
8540 // Don't bother looking for an integer type if the vector is scalable, skip
8541 // to vector types.
8542 if (!Scalable) {
8543 // If we have one element to load/store, return it.
8544 if (Width == WidenEltWidth)
8545 return RetVT;
8546
8547 // See if there is larger legal integer than the element type to load/store.
8548 for (EVT MemVT : reverse(MVT::integer_valuetypes())) {
8549 unsigned MemVTWidth = MemVT.getSizeInBits();
8550 if (MemVT.getSizeInBits() <= WidenEltWidth)
8551 break;
8552 auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
8553 if ((Action == TargetLowering::TypeLegal ||
8555 (WidenWidth % MemVTWidth) == 0 &&
8556 isPowerOf2_32(WidenWidth / MemVTWidth) &&
8557 (MemVTWidth <= Width ||
8558 (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
8559 if (MemVTWidth == WidenWidth)
8560 return MemVT;
8561 RetVT = MemVT;
8562 break;
8563 }
8564 }
8565 }
8566
8567 // See if there is a larger vector type to load/store that has the same vector
8568 // element type and is evenly divisible with the WidenVT.
8569 for (EVT MemVT : reverse(MVT::vector_valuetypes())) {
8570 // Skip vector MVTs which don't match the scalable property of WidenVT.
8571 if (Scalable != MemVT.isScalableVector())
8572 continue;
8573 unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinValue();
8574 auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
8575 if ((Action == TargetLowering::TypeLegal ||
8577 WidenEltVT == MemVT.getVectorElementType() &&
8578 (WidenWidth % MemVTWidth) == 0 &&
8579 isPowerOf2_32(WidenWidth / MemVTWidth) &&
8580 (MemVTWidth <= Width ||
8581 (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
8582 if (RetVT.getFixedSizeInBits() < MemVTWidth || MemVT == WidenVT)
8583 return MemVT;
8584 }
8585 }
8586
8587 // Using element-wise loads and stores for widening operations is not
8588 // supported for scalable vectors
8589 if (Scalable)
8590 return std::nullopt;
8591
8592 return RetVT;
8593}
8594
8595// Builds a vector type from scalar loads
8596// VecTy: Resulting Vector type
8597// LDOps: Load operators to build a vector type
8598// [Start,End) the list of loads to use.
8601 unsigned Start, unsigned End) {
8602 SDLoc dl(LdOps[Start]);
8603 EVT LdTy = LdOps[Start].getValueType();
8604 unsigned Width = VecTy.getSizeInBits();
8605 unsigned NumElts = Width / LdTy.getSizeInBits();
8606 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
8607
8608 unsigned Idx = 1;
8609 SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
8610
8611 for (unsigned i = Start + 1; i != End; ++i) {
8612 EVT NewLdTy = LdOps[i].getValueType();
8613 if (NewLdTy != LdTy) {
8614 NumElts = Width / NewLdTy.getSizeInBits();
8615 NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
8616 VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
8617 // Readjust position and vector position based on new load type.
8618 Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
8619 LdTy = NewLdTy;
8620 }
8621 VecOp = DAG.getInsertVectorElt(dl, VecOp, LdOps[i], Idx++);
8622 }
8623 return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
8624}
8625
8626SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
8627 LoadSDNode *LD) {
8628 // The strategy assumes that we can efficiently load power-of-two widths.
8629 // The routine chops the vector into the largest vector loads with the same
8630 // element type or scalar loads and then recombines it to the widen vector
8631 // type.
8632 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
8633 EVT LdVT = LD->getMemoryVT();
8634 SDLoc dl(LD);
8635 assert(LdVT.isVector() && WidenVT.isVector());
8636 assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
8638
8639 // Load information
8640 SDValue Chain = LD->getChain();
8641 SDValue BasePtr = LD->getBasePtr();
8642 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
8643 AAMDNodes AAInfo = LD->getAAInfo();
8644
8645 TypeSize LdWidth = LdVT.getSizeInBits();
8646 TypeSize WidenWidth = WidenVT.getSizeInBits();
8647 TypeSize WidthDiff = WidenWidth - LdWidth;
8648 // Allow wider loads if they are sufficiently aligned to avoid memory faults
8649 // and if the original load is simple.
8650 unsigned LdAlign =
8651 (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value();
8652
8653 // Find the vector type that can load from.
8654 std::optional<EVT> FirstVT =
8655 findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign,
8656 WidthDiff.getKnownMinValue());
8657
8658 if (!FirstVT)
8659 return SDValue();
8660
8661 SmallVector<EVT, 8> MemVTs;
8662 TypeSize FirstVTWidth = FirstVT->getSizeInBits();
8663
8664 // Unless we're able to load in one instruction we must work out how to load
8665 // the remainder.
8666 if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
8667 std::optional<EVT> NewVT = FirstVT;
8668 TypeSize RemainingWidth = LdWidth;
8669 TypeSize NewVTWidth = FirstVTWidth;
8670 do {
8671 RemainingWidth -= NewVTWidth;
8672 if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
8673 // The current type we are using is too large. Find a better size.
8674 NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinValue(),
8675 WidenVT, LdAlign, WidthDiff.getKnownMinValue());
8676 if (!NewVT)
8677 return SDValue();
8678 NewVTWidth = NewVT->getSizeInBits();
8679 }
8680 MemVTs.push_back(*NewVT);
8681 } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
8682 }
8683
8684 SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
8685 LD->getBaseAlign(), MMOFlags, AAInfo);
8686 LdChain.push_back(LdOp.getValue(1));
8687
8688 // Check if we can load the element with one instruction.
8689 if (MemVTs.empty())
8690 return coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl,
8691 DAG);
8692
8693 // Load vector by using multiple loads from largest vector to scalar.
8695 LdOps.push_back(LdOp);
8696
8697 uint64_t ScaledOffset = 0;
8698 MachinePointerInfo MPI = LD->getPointerInfo();
8699
8700 // First incremement past the first load.
8701 IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr,
8702 &ScaledOffset);
8703
8704 for (EVT MemVT : MemVTs) {
8705 Align NewAlign = ScaledOffset == 0
8706 ? LD->getBaseAlign()
8707 : commonAlignment(LD->getAlign(), ScaledOffset);
8708 SDValue L =
8709 DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
8710
8711 LdOps.push_back(L);
8712 LdChain.push_back(L.getValue(1));
8713 IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset);
8714 }
8715
8716 // Build the vector from the load operations.
8717 unsigned End = LdOps.size();
8718 if (!LdOps[0].getValueType().isVector())
8719 // All the loads are scalar loads.
8720 return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
8721
8722 // If the load contains vectors, build the vector using concat vector.
8723 // All of the vectors used to load are power-of-2, and the scalar loads can be
8724 // combined to make a power-of-2 vector.
8725 SmallVector<SDValue, 16> ConcatOps(End);
8726 int i = End - 1;
8727 int Idx = End;
8728 EVT LdTy = LdOps[i].getValueType();
8729 // First, combine the scalar loads to a vector.
8730 if (!LdTy.isVector()) {
8731 for (--i; i >= 0; --i) {
8732 LdTy = LdOps[i].getValueType();
8733 if (LdTy.isVector())
8734 break;
8735 }
8736 ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
8737 }
8738
8739 ConcatOps[--Idx] = LdOps[i];
8740 for (--i; i >= 0; --i) {
8741 EVT NewLdTy = LdOps[i].getValueType();
8742 if (NewLdTy != LdTy) {
8743 // Create a larger vector.
8744 TypeSize LdTySize = LdTy.getSizeInBits();
8745 TypeSize NewLdTySize = NewLdTy.getSizeInBits();
8746 assert(NewLdTySize.isScalable() == LdTySize.isScalable() &&
8747 NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinValue()));
8748 unsigned NumOps =
8749 NewLdTySize.getKnownMinValue() / LdTySize.getKnownMinValue();
8751 unsigned j = 0;
8752 for (; j != End-Idx; ++j)
8753 WidenOps[j] = ConcatOps[Idx+j];
8754 for (; j != NumOps; ++j)
8755 WidenOps[j] = DAG.getPOISON(LdTy);
8756
8757 ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
8758 WidenOps);
8759 Idx = End - 1;
8760 LdTy = NewLdTy;
8761 }
8762 ConcatOps[--Idx] = LdOps[i];
8763 }
8764
8765 if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
8766 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
8767 ArrayRef(&ConcatOps[Idx], End - Idx));
8768
8769 // We need to fill the rest with undefs to build the vector.
8770 unsigned NumOps =
8771 WidenWidth.getKnownMinValue() / LdTy.getSizeInBits().getKnownMinValue();
8773 SDValue UndefVal = DAG.getPOISON(LdTy);
8774 {
8775 unsigned i = 0;
8776 for (; i != End-Idx; ++i)
8777 WidenOps[i] = ConcatOps[Idx+i];
8778 for (; i != NumOps; ++i)
8779 WidenOps[i] = UndefVal;
8780 }
8781 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
8782}
8783
8784SDValue
8785DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
8786 LoadSDNode *LD,
8787 ISD::LoadExtType ExtType) {
8788 // For extension loads, it may not be more efficient to chop up the vector
8789 // and then extend it. Instead, we unroll the load and build a new vector.
8790 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
8791 EVT LdVT = LD->getMemoryVT();
8792 SDLoc dl(LD);
8793 assert(LdVT.isVector() && WidenVT.isVector());
8794 assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
8795
8796 // Load information
8797 SDValue Chain = LD->getChain();
8798 SDValue BasePtr = LD->getBasePtr();
8799 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
8800 AAMDNodes AAInfo = LD->getAAInfo();
8801
8802 if (LdVT.isScalableVector())
8803 return SDValue();
8804
8805 EVT EltVT = WidenVT.getVectorElementType();
8806 EVT LdEltVT = LdVT.getVectorElementType();
8807 unsigned NumElts = LdVT.getVectorNumElements();
8808
8809 // Load each element and widen.
8810 unsigned WidenNumElts = WidenVT.getVectorNumElements();
8811 SmallVector<SDValue, 16> Ops(WidenNumElts);
8812 unsigned Increment = LdEltVT.getSizeInBits() / 8;
8813 Ops[0] =
8814 DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
8815 LdEltVT, LD->getBaseAlign(), MMOFlags, AAInfo);
8816 LdChain.push_back(Ops[0].getValue(1));
8817 unsigned i = 0, Offset = Increment;
8818 for (i=1; i < NumElts; ++i, Offset += Increment) {
8819 SDValue NewBasePtr =
8820 DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::getFixed(Offset));
8821 Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
8822 LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
8823 LD->getBaseAlign(), MMOFlags, AAInfo);
8824 LdChain.push_back(Ops[i].getValue(1));
8825 }
8826
8827 // Fill the rest with undefs.
8828 SDValue UndefVal = DAG.getPOISON(EltVT);
8829 for (; i != WidenNumElts; ++i)
8830 Ops[i] = UndefVal;
8831
8832 return DAG.getBuildVector(WidenVT, dl, Ops);
8833}
8834
8835bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
8836 StoreSDNode *ST) {
8837 // The strategy assumes that we can efficiently store power-of-two widths.
8838 // The routine chops the vector into the largest vector stores with the same
8839 // element type or scalar stores.
8840 SDValue Chain = ST->getChain();
8841 SDValue BasePtr = ST->getBasePtr();
8842 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
8843 AAMDNodes AAInfo = ST->getAAInfo();
8844 SDValue ValOp = GetWidenedVector(ST->getValue());
8845 SDLoc dl(ST);
8846
8847 EVT StVT = ST->getMemoryVT();
8848 TypeSize StWidth = StVT.getSizeInBits();
8849 EVT ValVT = ValOp.getValueType();
8850 TypeSize ValWidth = ValVT.getSizeInBits();
8851 EVT ValEltVT = ValVT.getVectorElementType();
8852 unsigned ValEltWidth = ValEltVT.getFixedSizeInBits();
8853 assert(StVT.getVectorElementType() == ValEltVT);
8854 assert(StVT.isScalableVector() == ValVT.isScalableVector() &&
8855 "Mismatch between store and value types");
8856
8857 int Idx = 0; // current index to store
8858
8859 MachinePointerInfo MPI = ST->getPointerInfo();
8860 uint64_t ScaledOffset = 0;
8861
8862 // A breakdown of how to widen this vector store. Each element of the vector
8863 // is a memory VT combined with the number of times it is to be stored to,
8864 // e,g., v5i32 -> {{v2i32,2},{i32,1}}
8866
8867 while (StWidth.isNonZero()) {
8868 // Find the largest vector type we can store with.
8869 std::optional<EVT> NewVT =
8870 findMemType(DAG, TLI, StWidth.getKnownMinValue(), ValVT);
8871 if (!NewVT)
8872 return false;
8873 MemVTs.push_back({*NewVT, 0});
8874 TypeSize NewVTWidth = NewVT->getSizeInBits();
8875
8876 do {
8877 StWidth -= NewVTWidth;
8878 MemVTs.back().second++;
8879 } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
8880 }
8881
8882 for (const auto &Pair : MemVTs) {
8883 EVT NewVT = Pair.first;
8884 unsigned Count = Pair.second;
8885 TypeSize NewVTWidth = NewVT.getSizeInBits();
8886
8887 if (NewVT.isVector()) {
8888 unsigned NumVTElts = NewVT.getVectorMinNumElements();
8889 do {
8890 Align NewAlign = ScaledOffset == 0
8891 ? ST->getBaseAlign()
8892 : commonAlignment(ST->getAlign(), ScaledOffset);
8893 SDValue EOp = DAG.getExtractSubvector(dl, NewVT, ValOp, Idx);
8894 SDValue PartStore = DAG.getStore(Chain, dl, EOp, BasePtr, MPI, NewAlign,
8895 MMOFlags, AAInfo);
8896 StChain.push_back(PartStore);
8897
8898 Idx += NumVTElts;
8899 IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
8900 &ScaledOffset);
8901 } while (--Count);
8902 } else {
8903 // Cast the vector to the scalar type we can store.
8904 unsigned NumElts = ValWidth.getFixedValue() / NewVTWidth.getFixedValue();
8905 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
8906 SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
8907 // Readjust index position based on new vector type.
8908 Idx = Idx * ValEltWidth / NewVTWidth.getFixedValue();
8909 do {
8910 SDValue EOp = DAG.getExtractVectorElt(dl, NewVT, VecOp, Idx++);
8911 SDValue PartStore = DAG.getStore(Chain, dl, EOp, BasePtr, MPI,
8912 ST->getBaseAlign(), MMOFlags, AAInfo);
8913 StChain.push_back(PartStore);
8914
8915 IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
8916 } while (--Count);
8917 // Restore index back to be relative to the original widen element type.
8918 Idx = Idx * NewVTWidth.getFixedValue() / ValEltWidth;
8919 }
8920 }
8921
8922 return true;
8923}
8924
8925/// Modifies a vector input (widen or narrows) to a vector of NVT. The
8926/// input vector must have the same element type as NVT.
8927/// FillWithZeroes specifies that the vector should be widened with zeroes.
8928SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
8929 bool FillWithZeroes) {
8930 // Note that InOp might have been widened so it might already have
8931 // the right width or it might need be narrowed.
8932 EVT InVT = InOp.getValueType();
8934 "input and widen element type must match");
8935 assert(InVT.isScalableVector() == NVT.isScalableVector() &&
8936 "cannot modify scalable vectors in this way");
8937 SDLoc dl(InOp);
8938
8939 // Check if InOp already has the right width.
8940 if (InVT == NVT)
8941 return InOp;
8942
8943 ElementCount InEC = InVT.getVectorElementCount();
8944 ElementCount WidenEC = NVT.getVectorElementCount();
8945 if (WidenEC.hasKnownScalarFactor(InEC)) {
8946 unsigned NumConcat = WidenEC.getKnownScalarFactor(InEC);
8947 SmallVector<SDValue, 16> Ops(NumConcat);
8948 SDValue FillVal =
8949 FillWithZeroes ? DAG.getConstant(0, dl, InVT) : DAG.getPOISON(InVT);
8950 Ops[0] = InOp;
8951 for (unsigned i = 1; i != NumConcat; ++i)
8952 Ops[i] = FillVal;
8953
8954 return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
8955 }
8956
8957 if (InEC.hasKnownScalarFactor(WidenEC))
8958 return DAG.getExtractSubvector(dl, NVT, InOp, 0);
8959
8960 assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&
8961 "Scalable vectors should have been handled already.");
8962
8963 unsigned InNumElts = InEC.getFixedValue();
8964 unsigned WidenNumElts = WidenEC.getFixedValue();
8965
8966 // Fall back to extract and build (+ mask, if padding with zeros).
8967 SmallVector<SDValue, 16> Ops(WidenNumElts);
8968 EVT EltVT = NVT.getVectorElementType();
8969 unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
8970 unsigned Idx;
8971 for (Idx = 0; Idx < MinNumElts; ++Idx)
8972 Ops[Idx] = DAG.getExtractVectorElt(dl, EltVT, InOp, Idx);
8973
8974 SDValue UndefVal = DAG.getPOISON(EltVT);
8975 for (; Idx < WidenNumElts; ++Idx)
8976 Ops[Idx] = UndefVal;
8977
8978 SDValue Widened = DAG.getBuildVector(NVT, dl, Ops);
8979 if (!FillWithZeroes)
8980 return Widened;
8981
8982 assert(NVT.isInteger() &&
8983 "We expect to never want to FillWithZeroes for non-integral types.");
8984
8986 MaskOps.append(MinNumElts, DAG.getAllOnesConstant(dl, EltVT));
8987 MaskOps.append(WidenNumElts - MinNumElts, DAG.getConstant(0, dl, EltVT));
8988
8989 return DAG.getNode(ISD::AND, dl, NVT, Widened,
8990 DAG.getBuildVector(NVT, dl, MaskOps));
8991}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static constexpr Value * getValue(Ty &ValueOrUse)
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static unsigned getExtendForIntVecReduction(SDNode *N)
static SDValue BuildVectorFromScalar(SelectionDAG &DAG, EVT VecTy, SmallVectorImpl< SDValue > &LdOps, unsigned Start, unsigned End)
static std::optional< EVT > findMemType(SelectionDAG &DAG, const TargetLowering &TLI, unsigned Width, EVT WidenVT, unsigned Align, unsigned WidenEx)
static EVT getSETCCOperandType(SDValue N)
static bool isSETCCOp(unsigned Opcode)
static bool isLogicalMaskOp(unsigned Opcode)
static bool isSETCCorConvertedSETCC(SDValue N)
static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, SmallVectorImpl< SDValue > &ConcatOps, unsigned ConcatEnd, EVT VT, EVT MaxVT, EVT WidenVT)
static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, SelectionDAG &DAG)
Either return the same load or provide appropriate casts from the load and return that.
#define I(x, y, z)
Definition MD5.cpp:57
static bool isUndef(const MachineInstr &MI)
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
uint64_t High
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
This is an SDNode representing atomic operations.
LLVM_ABI unsigned getVScaleRangeMin() const
Returns the minimum value for the vscale_range attribute.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
This class is used to represent ISD::LOAD nodes.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
static auto integer_valuetypes()
static auto vector_valuetypes()
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
This class is used to represent an MGATHER node.
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
This is an abstract virtual class for memory operations.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVMContext * getContext() const
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
Vector takeVector()
Clear the SetVector and return the underlying vector.
Definition SetVector.h:94
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
BooleanContent
Enum that describes how the target represents true/false values.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent a VP_LOAD node.
const SDValue & getValue() const
This class is used to represent a VP_STORE node.
This class is used to represent an EXPERIMENTAL_VP_STRIDED_LOAD node.
const SDValue & getMask() const
ISD::LoadExtType getExtensionType() const
const SDValue & getStride() const
const SDValue & getOffset() const
const SDValue & getVectorLength() const
const SDValue & getBasePtr() const
This class is used to represent an EXPERIMENTAL_VP_STRIDED_STORE node.
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if this is a truncating store.
const SDValue & getOffset() const
const SDValue & getVectorLength() const
const SDValue & getStride() const
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
Definition TypeSize.h:269
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isNonZero() const
Definition TypeSize.h:155
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
Definition TypeSize.h:277
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr bool isKnownEven() const
A return value of true indicates we know at compile time that the number of elements (vscale * Min) i...
Definition TypeSize.h:176
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:237
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ POISON
POISON - A poison node.
Definition ISDOpcodes.h:236
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ CTTZ_ELTS
Returns the number of number of trailing (least significant) zero elements in a vector.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
@ FATAN2
FATAN2 - atan2, inspired by libm.
@ FSINCOSPI
FSINCOSPI - Compute both the sine and cosine times pi more accurately than FSINCOS(pi*x),...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ FPTRUNC_ROUND
FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic.
Definition ISDOpcodes.h:515
@ FAKE_USE
FAKE_USE represents a use of the operand but does not do anything.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:778
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ CONVERT_FROM_ARBITRARY_FP
CONVERT_FROM_ARBITRARY_FP - This operator converts from an arbitrary floating-point represented as an...
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:792
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PARTIAL_REDUCE_FMLA
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:635
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:691
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ ARITH_FENCE
ARITH_FENCE - This corresponds to a arithmetic fence intrinsic.
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ AssertNoFPClass
AssertNoFPClass - These nodes record if a register contains a float value that is known to be not som...
Definition ISDOpcodes.h:78
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ MASKED_UDIV
Masked vector arithmetic that returns poison on disabled lanes.
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:640
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition ISDOpcodes.h:735
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:791
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:699
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Experimental vector histogram intrinsic Operands: Input Chain, Inc, Mask, Base, Index,...
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:949
@ VECREDUCE_FMINIMUM
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ VECREDUCE_SEQ_FMUL
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:624
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ABS_MIN_POISON
ABS with a poison result for INT_MIN.
Definition ISDOpcodes.h:751
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
@ LOOP_DEPENDENCE_WAR_MASK
The llvm.loop.dependence.
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getUnmaskedBinOpOpcode(unsigned MaskedOpc)
Given a MaskedOpc of ISD::MASKED_(U|S)(DIV|REM), returns the unmasked ISD::(U|S)(DIV|REM).
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPForBaseOpcode(unsigned Opcode)
Translate this non-VP Opcode to its corresponding VP Opcode.
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
constexpr double e
Context & getContext() const
Definition BasicBlock.h:99
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr int PoisonMaskElem
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1884
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:486
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:460
bool isFixedLengthVector() const
Definition ValueTypes.h:189
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:427
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:264
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition ValueTypes.h:276
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.