LLVM 23.0.0git
VPlanUtils.cpp
Go to the documentation of this file.
1//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanUtils.h"
10#include "VPlanAnalysis.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "VPlanPatternMatch.h"
14#include "llvm/ADT/TypeSwitch.h"
18
19using namespace llvm;
20using namespace llvm::VPlanPatternMatch;
21using namespace llvm::SCEVPatternMatch;
22
24 return all_of(Def->users(),
25 [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); });
26}
27
29 return all_of(Def->users(),
30 [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); });
31}
32
34 return all_of(Def->users(),
35 [Def](const VPUser *U) { return U->usesScalars(Def); });
36}
37
39 if (auto *E = dyn_cast<SCEVConstant>(Expr))
40 return Plan.getOrAddLiveIn(E->getValue());
41 // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
42 // value. Otherwise the value may be defined in a loop and using it directly
43 // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
44 // form.
45 auto *U = dyn_cast<SCEVUnknown>(Expr);
46 if (U && !isa<Instruction>(U->getValue()))
47 return Plan.getOrAddLiveIn(U->getValue());
48 auto *Expanded = new VPExpandSCEVRecipe(Expr);
49 Plan.getEntry()->appendRecipe(Expanded);
50 return Expanded;
51}
52
53bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
55 return true;
56
57 auto IsWideCanonicalIV = [](VPValue *A) {
61 };
62
63 VPValue *A, *B;
64
65 auto m_CanonicalScalarIVSteps = m_ScalarIVSteps(
68 m_One(), m_Specific(&Plan.getVF()));
69
71 return B == Plan.getTripCount() &&
72 (match(A, m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A));
73
74 // For scalar plans, the header mask uses the scalar steps.
75 if (match(V, m_ICmp(m_CanonicalScalarIVSteps,
77 assert(Plan.hasScalarVFOnly() &&
78 "Non-scalar VF using scalar IV steps for header mask?");
79 return true;
80 }
81
82 return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
83 B == Plan.getBackedgeTakenCount();
84}
85
86/// Returns true if \p R propagates poison from any operand to its result.
90 [](const VPRecipeBase *) { return true; })
91 .Case([](const VPReplicateRecipe *Rep) {
92 // GEP and casts propagate poison from all operands.
93 unsigned Opcode = Rep->getOpcode();
94 return Opcode == Instruction::GetElementPtr ||
95 Instruction::isCast(Opcode);
96 })
97 .Default([](const VPRecipeBase *) { return false; });
98}
99
100/// Returns true if \p V being poison is guaranteed to trigger UB because it
101/// propagates to the address of a memory recipe.
102static bool poisonGuaranteesUB(const VPValue *V) {
105
106 Worklist.push_back(V);
107
108 while (!Worklist.empty()) {
109 const VPValue *Current = Worklist.pop_back_val();
110 if (!Visited.insert(Current).second)
111 continue;
112
113 for (VPUser *U : Current->users()) {
114 // Check if Current is used as an address operand for load/store.
115 if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) {
116 if (MemR->getAddr() == Current)
117 return true;
118 continue;
119 }
120 if (auto *Rep = dyn_cast<VPReplicateRecipe>(U)) {
121 unsigned Opcode = Rep->getOpcode();
122 if ((Opcode == Instruction::Load && Rep->getOperand(0) == Current) ||
123 (Opcode == Instruction::Store && Rep->getOperand(1) == Current))
124 return true;
125 }
126
127 // Check if poison propagates through this recipe to any of its users.
128 auto *R = cast<VPRecipeBase>(U);
129 for (const VPValue *Op : R->operands()) {
130 if (Op == Current && propagatesPoisonFromRecipeOp(R)) {
131 Worklist.push_back(R->getVPSingleValue());
132 break;
133 }
134 }
135 }
136 }
137
138 return false;
139}
140
143 const Loop *L) {
144 ScalarEvolution &SE = *PSE.getSE();
146 Value *LiveIn = V->getUnderlyingValue();
147 if (LiveIn && SE.isSCEVable(LiveIn->getType()))
148 return SE.getSCEV(LiveIn);
149 return SE.getCouldNotCompute();
150 }
151
152 // Helper to create SCEVs for binary and unary operations.
153 auto CreateSCEV = [&](ArrayRef<VPValue *> Ops,
154 function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn)
155 -> const SCEV * {
157 for (VPValue *Op : Ops) {
158 const SCEV *S = getSCEVExprForVPValue(Op, PSE, L);
160 return SE.getCouldNotCompute();
161 SCEVOps.push_back(S);
162 }
163 return CreateFn(SCEVOps);
164 };
165
166 VPValue *LHSVal, *RHSVal;
167 if (match(V, m_Add(m_VPValue(LHSVal), m_VPValue(RHSVal))))
168 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
169 return SE.getAddExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
170 });
171 if (match(V, m_Sub(m_VPValue(LHSVal), m_VPValue(RHSVal))))
172 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
173 return SE.getMinusSCEV(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
174 });
175 if (match(V, m_Not(m_VPValue(LHSVal)))) {
176 // not X = xor X, -1 = -1 - X
177 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
178 return SE.getMinusSCEV(SE.getMinusOne(Ops[0]->getType()), Ops[0]);
179 });
180 }
181 if (match(V, m_Mul(m_VPValue(LHSVal), m_VPValue(RHSVal))))
182 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
183 return SE.getMulExpr(Ops[0], Ops[1], SCEV::FlagAnyWrap, 0);
184 });
185 if (match(V,
187 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
188 return SE.getUDivExpr(Ops[0], Ops[1]);
189 });
190 // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n.
191 const APInt *Mask;
192 if (match(V, m_c_BinaryAnd(m_VPValue(LHSVal), m_APInt(Mask))) &&
193 (*Mask + 1).isPowerOf2())
194 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
195 return SE.getURemExpr(Ops[0], SE.getConstant(*Mask + 1));
196 });
197 if (match(V, m_Trunc(m_VPValue(LHSVal)))) {
198 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
199 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
200 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
201 return SE.getTruncateExpr(Ops[0], DestTy);
202 });
203 }
204 if (match(V, m_ZExt(m_VPValue(LHSVal)))) {
205 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
206 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
207 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
208 return SE.getZeroExtendExpr(Ops[0], DestTy);
209 });
210 }
211 if (match(V, m_SExt(m_VPValue(LHSVal)))) {
212 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
213 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
214
215 // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension
216 // onto the operands before computing the subtraction.
217 VPValue *SubLHS, *SubRHS;
218 auto *SubR = dyn_cast<VPRecipeWithIRFlags>(LHSVal);
219 if (match(LHSVal, m_Sub(m_VPValue(SubLHS), m_VPValue(SubRHS))) && SubR &&
220 SubR->hasNoSignedWrap() && poisonGuaranteesUB(LHSVal)) {
221 const SCEV *V1 = getSCEVExprForVPValue(SubLHS, PSE, L);
222 const SCEV *V2 = getSCEVExprForVPValue(SubRHS, PSE, L);
224 return SE.getMinusSCEV(SE.getSignExtendExpr(V1, DestTy),
225 SE.getSignExtendExpr(V2, DestTy), SCEV::FlagNSW);
226 }
227
228 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
229 return SE.getSignExtendExpr(Ops[0], DestTy);
230 });
231 }
232 if (match(V,
234 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
235 return SE.getUMaxExpr(Ops[0], Ops[1]);
236 });
237 if (match(V,
239 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
240 return SE.getSMaxExpr(Ops[0], Ops[1]);
241 });
242 if (match(V,
244 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
245 return SE.getUMinExpr(Ops[0], Ops[1]);
246 });
247 if (match(V,
249 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
250 return SE.getSMinExpr(Ops[0], Ops[1]);
251 });
252
254 Type *SourceElementType;
255 if (match(V, m_GetElementPtr(SourceElementType, Ops))) {
256 const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef<SCEVUse> Ops) {
257 return SE.getGEPExpr(Ops.front(), Ops.drop_front(), SourceElementType);
258 });
259 return PSE.getPredicatedSCEV(GEPExpr);
260 }
261
262 // TODO: Support constructing SCEVs for more recipes as needed.
263 const VPRecipeBase *DefR = V->getDefiningRecipe();
264 const SCEV *Expr =
266 .Case([](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
267 .Case([&SE, &PSE, L](const VPCanonicalIVPHIRecipe *R) {
268 if (!L)
269 return SE.getCouldNotCompute();
270 const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
271 return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L,
273 })
274 .Case([&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) {
275 const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
276 if (!L || isa<SCEVCouldNotCompute>(Step))
277 return SE.getCouldNotCompute();
278 const SCEV *Start =
279 getSCEVExprForVPValue(R->getStartValue(), PSE, L);
280 const SCEV *AddRec =
281 SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
282 if (R->getTruncInst())
283 return SE.getTruncateExpr(AddRec, R->getScalarType());
284 return AddRec;
285 })
286 .Case([&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) {
287 const SCEV *Start =
288 getSCEVExprForVPValue(R->getStartValue(), PSE, L);
289 if (!L || isa<SCEVCouldNotCompute>(Start))
290 return SE.getCouldNotCompute();
291 const SCEV *Step = getSCEVExprForVPValue(R->getStepValue(), PSE, L);
292 if (isa<SCEVCouldNotCompute>(Step))
293 return SE.getCouldNotCompute();
294 return SE.getAddRecExpr(Start, Step, L, SCEV::FlagAnyWrap);
295 })
296 .Case([&SE, &PSE, L](const VPDerivedIVRecipe *R) {
297 const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
298 const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
299 const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), PSE, L);
300 if (any_of(ArrayRef({Start, IV, Scale}),
302 return SE.getCouldNotCompute();
303
304 return SE.getAddExpr(
305 SE.getTruncateOrSignExtend(Start, IV->getType()),
306 SE.getMulExpr(
307 IV, SE.getTruncateOrSignExtend(Scale, IV->getType())));
308 })
309 .Case([&SE, &PSE, L](const VPScalarIVStepsRecipe *R) {
310 const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), PSE, L);
311 const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), PSE, L);
313 return SE.getCouldNotCompute();
314 return SE.getTruncateOrSignExtend(IV, Step->getType());
315 })
316 .Default(
317 [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
318
319 return PSE.getPredicatedSCEV(Expr);
320}
321
323 const Loop *L) {
324 // If address is an SCEVAddExpr, we require that all operands must be either
325 // be invariant or a (possibly sign-extend) affine AddRec.
326 if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Addr)) {
327 return all_of(PtrAdd->operands(), [&SE, L](const SCEV *Op) {
328 return SE.isLoopInvariant(Op, L) ||
329 match(Op, m_scev_SExt(m_scev_AffineAddRec(m_SCEV(), m_SCEV()))) ||
330 match(Op, m_scev_AffineAddRec(m_SCEV(), m_SCEV()));
331 });
332 }
333
334 // Otherwise, check if address is loop invariant or an affine add recurrence.
335 return SE.isLoopInvariant(Addr, L) ||
337}
338
339/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are
340/// uniform, the result will also be uniform.
341static bool preservesUniformity(unsigned Opcode) {
342 if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
343 return true;
344 switch (Opcode) {
345 case Instruction::Freeze:
346 case Instruction::GetElementPtr:
347 case Instruction::ICmp:
348 case Instruction::FCmp:
349 case Instruction::Select:
354 return true;
355 default:
356 return false;
357 }
358}
359
361 // A live-in must be uniform across the scope of VPlan.
363 return true;
364
365 if (auto *Rep = dyn_cast<VPReplicateRecipe>(VPV)) {
366 const VPRegionBlock *RegionOfR = Rep->getRegion();
367 // Don't consider recipes in replicate regions as uniform yet; their first
368 // lane cannot be accessed when executing the replicate region for other
369 // lanes.
370 if (RegionOfR && RegionOfR->isReplicator())
371 return false;
372 return Rep->isSingleScalar() || (preservesUniformity(Rep->getOpcode()) &&
373 all_of(Rep->operands(), isSingleScalar));
374 }
377 if (auto *WidenR = dyn_cast<VPWidenRecipe>(VPV)) {
378 return preservesUniformity(WidenR->getOpcode()) &&
379 all_of(WidenR->operands(), isSingleScalar);
380 }
381 if (auto *VPI = dyn_cast<VPInstruction>(VPV))
382 return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
383 (preservesUniformity(VPI->getOpcode()) &&
384 all_of(VPI->operands(), isSingleScalar));
385 if (auto *RR = dyn_cast<VPReductionRecipe>(VPV))
386 return !RR->isPartialReduction();
389 return true;
390 if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
391 return Expr->isSingleScalar();
392
393 // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
394 return isa<VPExpandSCEVRecipe>(VPV);
395}
396
398 // Live-ins are uniform.
400 return true;
401
402 VPRecipeBase *R = V->getDefiningRecipe();
403 VPBasicBlock *VPBB = R ? R->getParent() : nullptr;
404 VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr;
405 if (VPBB &&
406 (VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) {
407 if (match(V->getDefiningRecipe(),
409 return false;
410 return all_of(R->operands(), isUniformAcrossVFsAndUFs);
411 }
412
413 if (VPRegionBlock *EnclosingRegion = VPBB->getEnclosingLoopRegion()) {
414 // Canonical IV is uniform.
415 if (V == EnclosingRegion->getCanonicalIV())
416 return true;
417 }
418
420 .Case([](const VPDerivedIVRecipe *R) { return true; })
421 .Case([](const VPReplicateRecipe *R) {
422 // Be conservative about side-effects, except for the
423 // known-side-effecting assumes and stores, which we know will be
424 // uniform.
425 return R->isSingleScalar() &&
426 (!R->mayHaveSideEffects() ||
427 isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) &&
428 all_of(R->operands(), isUniformAcrossVFsAndUFs);
429 })
430 .Case([](const VPWidenRecipe *R) {
431 return preservesUniformity(R->getOpcode()) &&
432 all_of(R->operands(), isUniformAcrossVFsAndUFs);
433 })
434 .Case([](const VPInstruction *VPI) {
435 return (VPI->isScalarCast() &&
439 })
440 .Case([](const VPWidenCastRecipe *R) {
441 // A cast is uniform according to its operand.
442 return isUniformAcrossVFsAndUFs(R->getOperand(0));
443 })
444 .Default([](const VPRecipeBase *) { // A value is considered non-uniform
445 // unless proven otherwise.
446 return false;
447 });
448}
449
451 auto DepthFirst = vp_depth_first_shallow(Plan.getEntry());
452 auto I = find_if(DepthFirst, [&VPDT](VPBlockBase *VPB) {
453 return VPBlockUtils::isHeader(VPB, VPDT);
454 });
455 return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
456}
457
459 if (!R)
460 return 1;
461 if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
462 return RR->getVFScaleFactor();
463 if (auto *RR = dyn_cast<VPReductionRecipe>(R))
464 return RR->getVFScaleFactor();
465 if (auto *ER = dyn_cast<VPExpressionRecipe>(R))
466 return ER->getVFScaleFactor();
467 assert(
470 "getting scaling factor of reduction-start-vector not implemented yet");
471 return 1;
472}
473
474std::optional<VPValue *>
478 // Given a VPlan like the following (just including the recipes contributing
479 // to loop control exiting here, not the actual work), we're looking to match
480 // the recipes contributing to the uncountable exit condition comparison
481 // (here, vp<%4>) back to either live-ins or the address nodes for the load
482 // used as part of the uncountable exit comparison so that we can copy them
483 // to a preheader and rotate the address in the loop to the next vector
484 // iteration.
485 //
486 // Currently, the address of the load is restricted to a GEP with 2 operands
487 // and a live-in base address. This constraint may be relaxed later.
488 //
489 // VPlan ' for UF>=1' {
490 // Live-in vp<%0> = VF
491 // Live-in ir<64> = original trip-count
492 //
493 // entry:
494 // Successor(s): preheader, vector.ph
495 //
496 // vector.ph:
497 // Successor(s): vector loop
498 //
499 // <x1> vector loop: {
500 // vector.body:
501 // EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
502 // vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
503 // CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
504 // WIDEN ir<%ee.load> = load ir<%ee.addr>
505 // WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
506 // EMIT vp<%5> = any-of vp<%4>
507 // EMIT vp<%6> = add vp<%2>, vp<%0>
508 // EMIT vp<%7> = icmp eq vp<%6>, ir<64>
509 // EMIT branch-on-two-conds vp<%5>, vp<%7>
510 // No successors
511 // }
512 // Successor(s): early.exit, middle.block
513 //
514 // middle.block:
515 // Successor(s): preheader
516 //
517 // preheader:
518 // No successors
519 // }
520
521 // Find the uncountable loop exit condition.
522 auto *Region = Plan.getVectorLoopRegion();
523 VPValue *UncountableCondition = nullptr;
524 if (!match(Region->getExitingBasicBlock()->getTerminator(),
525 m_BranchOnTwoConds(m_AnyOf(m_VPValue(UncountableCondition)),
526 m_VPValue())))
527 return std::nullopt;
528
530 Worklist.push_back(UncountableCondition);
531 while (!Worklist.empty()) {
532 VPValue *V = Worklist.pop_back_val();
533
534 // Any value defined outside the loop does not need to be copied.
535 if (V->isDefinedOutsideLoopRegions())
536 continue;
537
538 // FIXME: Remove the single user restriction; it's here because we're
539 // starting with the simplest set of loops we can, and multiple
540 // users means needing to add PHI nodes in the transform.
541 if (V->getNumUsers() > 1)
542 return std::nullopt;
543
544 VPValue *Op1, *Op2;
545 // Walk back through recipes until we find at least one load from memory.
546 if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) {
547 Worklist.push_back(Op1);
548 Worklist.push_back(Op2);
549 Recipes.push_back(V->getDefiningRecipe());
550 } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
551 // Reject masked loads for the time being; they make the exit condition
552 // more complex.
553 if (Load->isMasked())
554 return std::nullopt;
555
556 VPValue *GEP = Load->getAddr();
558 return std::nullopt;
559
560 Recipes.push_back(Load);
561 Recipes.push_back(GEP->getDefiningRecipe());
562 GEPs.push_back(GEP->getDefiningRecipe());
563 } else
564 return std::nullopt;
565 }
566
567 return UncountableCondition;
568}
569
571 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
572 SmallVector<VPValue *> WideCanonicalIVs;
573 auto *FoundWidenCanonicalIVUser = find_if(
575 assert(count_if(LoopRegion->getCanonicalIV()->users(),
577 "Must have at most one VPWideCanonicalIVRecipe");
578 if (FoundWidenCanonicalIVUser !=
579 LoopRegion->getCanonicalIV()->users().end()) {
580 auto *WideCanonicalIV =
581 cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
582 WideCanonicalIVs.push_back(WideCanonicalIV);
583 }
584
585 // Also include VPWidenIntOrFpInductionRecipes that represent a widened
586 // version of the canonical induction.
587 VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
588 for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
589 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
590 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
591 WideCanonicalIVs.push_back(WidenOriginalIV);
592 }
593
594 // Walk users of wide canonical IVs and find the single compare of the form
595 // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
596 VPSingleDefRecipe *HeaderMask = nullptr;
597 for (auto *Wide : WideCanonicalIVs) {
598 for (VPUser *U : Wide->users()) {
599 auto *VPI = dyn_cast<VPInstruction>(U);
600 if (!VPI || !vputils::isHeaderMask(VPI, Plan))
601 continue;
602
603 assert(VPI->getOperand(0) == Wide &&
604 "WidenCanonicalIV must be the first operand of the compare");
605 assert(!HeaderMask && "Multiple header masks found?");
606 HeaderMask = VPI;
607 }
608 }
609 return HeaderMask;
610}
611
614 VPBasicBlock *LastBB) {
615 assert(FirstBB->getParent() == LastBB->getParent() &&
616 "FirstBB and LastBB from different regions");
617#ifndef NDEBUG
618 bool InSingleSuccChain = false;
619 for (VPBlockBase *Succ = FirstBB; Succ; Succ = Succ->getSingleSuccessor())
620 InSingleSuccChain |= (Succ == LastBB);
621 assert(InSingleSuccChain &&
622 "LastBB unreachable from FirstBB in single-successor chain");
623#endif
624 auto Blocks = to_vector(
626 auto *LastIt = find(Blocks, LastBB);
627 assert(LastIt != Blocks.end() &&
628 "LastBB unreachable from FirstBB in depth-first traversal");
629 Blocks.erase(std::next(LastIt), Blocks.end());
630 return Blocks;
631}
632
634 const VPDominatorTree &VPDT) {
635 auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
636 if (!VPBB)
637 return false;
638
639 // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
640 // VPBB as its entry, i.e., free of predecessors.
641 if (auto *R = VPBB->getParent())
642 return !R->isReplicator() && !VPBB->hasPredecessors();
643
644 // A header dominates its second predecessor (the latch), with the other
645 // predecessor being the preheader
646 return VPB->getPredecessors().size() == 2 &&
647 VPDT.dominates(VPB, VPB->getPredecessors()[1]);
648}
649
651 const VPDominatorTree &VPDT) {
652 // A latch has a header as its second successor, with its other successor
653 // leaving the loop. A preheader OTOH has a header as its first (and only)
654 // successor.
655 return VPB->getNumSuccessors() == 2 &&
656 VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
657}
658
659std::optional<MemoryLocation>
661 auto *M = dyn_cast<VPIRMetadata>(&R);
662 if (!M)
663 return std::nullopt;
665 // Populate noalias metadata from VPIRMetadata.
666 if (MDNode *NoAliasMD = M->getMetadata(LLVMContext::MD_noalias))
667 Loc.AATags.NoAlias = NoAliasMD;
668 if (MDNode *AliasScopeMD = M->getMetadata(LLVMContext::MD_alias_scope))
669 Loc.AATags.Scope = AliasScopeMD;
670 return Loc;
671}
672
673/// Find the ComputeReductionResult recipe for \p PhiR, looking through selects
674/// inserted for predicated reductions or tail folding.
676 VPValue *BackedgeVal = PhiR->getBackedgeValue();
678 BackedgeVal))
679 return Res;
680
681 // Look through selects inserted for tail folding or predicated reductions.
683 BackedgeVal, m_Select(m_VPValue(), m_VPValue(), m_VPValue()));
684 if (!SelR)
685 return nullptr;
688}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Hexagon Common GEP
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility analysis objects describing memory locations.
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R)
Returns true if R propagates poison from any operand to its result.
static bool preservesUniformity(unsigned Opcode)
Returns true if Opcode preserves uniformity, i.e., if all operands are uniform, the result will also ...
static bool poisonGuaranteesUB(const VPValue *V)
Returns true if V being poison is guaranteed to trigger UB because it propagates to the address of a ...
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
bool isCast() const
bool isBinaryOp() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Representation for a specific memory location.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getPredicatedSCEV(const SCEV *Expr)
Returns the rewritten SCEV for Expr in the context of the current SCEV predicate.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getURemExpr(SCEVUse LHS, SCEVUse RHS)
Represents an unsigned remainder expression based on unsigned division.
LLVM_ABI const SCEV * getSMinExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAddRecExpr(SCEVUse Start, SCEVUse Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
const SCEV * getMinusOne(Type *Ty)
Return a SCEV for the constant -1 of a specific type.
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * getSMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getGEPExpr(GEPOperator *GEP, ArrayRef< SCEVUse > IndexExprs)
Returns an expression for a GEP.
LLVM_ABI const SCEV * getUMinExpr(SCEVUse LHS, SCEVUse RHS, bool Sequential=false)
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
Definition TypeSwitch.h:89
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
Definition TypeSwitch.h:98
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4269
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4344
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4357
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:598
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:98
VPRegionBlock * getParent()
Definition VPlan.h:190
size_t getNumSuccessors() const
Definition VPlan.h:241
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:226
VPlan * getPlan()
Definition VPlan.cpp:177
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:231
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:215
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:273
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3831
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:4001
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
Recipe to expand a SCEV expression.
Definition VPlan.h:3793
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2348
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1225
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1321
unsigned getOpcode() const
Definition VPlan.h:1405
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:406
bool isScalarCast() const
Return true if the recipe is a scalar cast.
A recipe for handling reduction phis.
Definition VPlan.h:2700
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4457
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4525
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4555
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3217
unsigned getOpcode() const
Definition VPlan.h:3287
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4073
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:607
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:296
operand_range operands()
Definition VPlanValue.h:364
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:335
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
user_range users()
Definition VPlanValue.h:149
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
Definition VPlan.h:2154
A recipe to compute the pointers for widened memory accesses of SourceElementTy.
Definition VPlan.h:2227
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1840
A recipe for handling GEP instructions.
Definition VPlan.h:2090
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2454
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1784
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4587
VPBasicBlock * getEntry()
Definition VPlan.h:4679
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4737
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4763
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4843
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1058
bool hasScalarVFOnly() const
Definition VPlan.h:4811
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4684
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4769
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
An efficient, type-erasing, non-owning reference to a callable.
IteratorT end() const
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
auto m_ScalarIVSteps(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, const Loop *L)
Returns true if Addr is an address SCEV that can be passed to TTI::getAddressComputationCost,...
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
Definition VPlanUtils.h:132
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
unsigned getVFScaleFactor(VPRecipeBase *R)
Get the VF scaling factor applied to the recipe's output, if the recipe has one.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
LLVM_ABI_FOR_TEST std::optional< VPValue * > getRecipesForUncountableExit(VPlan &Plan, SmallVectorImpl< VPRecipeBase * > &Recipes, SmallVectorImpl< VPRecipeBase * > &GEPs)
Returns the VPValue representing the uncountable exit comparison used by AnyOf if the recipes it depe...
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
Definition VPlanCFG.h:280
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866