LLVM 23.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanDominatorTree.h"
19#include "VPlanHelpers.h"
20#include "VPlanPatternMatch.h"
21#include "VPlanTransforms.h"
22#include "VPlanUtils.h"
23#include "llvm/Analysis/Loads.h"
30#include "llvm/IR/InstrTypes.h"
31#include "llvm/IR/MDBuilder.h"
32#include "llvm/Support/Debug.h"
35
36#define DEBUG_TYPE "vplan"
37
38using namespace llvm;
39using namespace VPlanPatternMatch;
40
41namespace {
42// Class that is used to build the plain CFG for the incoming IR.
43class PlainCFGBuilder {
44 // The outermost loop of the input loop nest considered for vectorization.
45 Loop *TheLoop;
46
47 // Loop Info analysis.
48 LoopInfo *LI;
49
50 // Loop versioning for alias metadata.
51 LoopVersioning *LVer;
52
53 // Vectorization plan that we are working on.
54 std::unique_ptr<VPlan> Plan;
55
56 // Builder of the VPlan instruction-level representation.
57 VPBuilder VPIRBuilder;
58
59 // NOTE: The following maps are intentionally destroyed after the plain CFG
60 // construction because subsequent VPlan-to-VPlan transformation may
61 // invalidate them.
62 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
64 // Map incoming Value definitions to their newly-created VPValues.
65 DenseMap<Value *, VPValue *> IRDef2VPValue;
66
67 // Hold phi node's that need to be fixed once the plain CFG has been built.
69
70 // Utility functions.
71 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
72 void fixHeaderPhis();
73 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
74#ifndef NDEBUG
75 bool isExternalDef(Value *Val);
76#endif
77 VPValue *getOrCreateVPOperand(Value *IRVal);
78 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
79
80public:
81 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer)
82 : TheLoop(Lp), LI(LI), LVer(LVer), Plan(std::make_unique<VPlan>(Lp)) {}
83
84 /// Build plain CFG for TheLoop and connect it to Plan's entry.
85 std::unique_ptr<VPlan> buildPlainCFG();
86};
87} // anonymous namespace
88
89// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
90// must have no predecessors.
91void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
92 // Collect VPBB predecessors.
94 for (BasicBlock *Pred : predecessors(BB))
95 VPBBPreds.push_back(getOrCreateVPBB(Pred));
96 VPBB->setPredecessors(VPBBPreds);
97}
98
99static bool isHeaderBB(BasicBlock *BB, Loop *L) {
100 return L && BB == L->getHeader();
101}
102
103// Add operands to VPInstructions representing phi nodes from the input IR.
104void PlainCFGBuilder::fixHeaderPhis() {
105 for (auto *Phi : PhisToFix) {
106 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
107 VPValue *VPVal = IRDef2VPValue[Phi];
108 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
109 auto *PhiR = cast<VPPhi>(VPVal);
110 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
111 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
112 "Expected Phi in header block.");
113 assert(Phi->getNumOperands() == 2 &&
114 "header phi must have exactly 2 operands");
115 for (BasicBlock *Pred : predecessors(Phi->getParent()))
116 PhiR->addOperand(
117 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
118 }
119}
120
121// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
122// existing one if it was already created.
123VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
124 if (auto *VPBB = BB2VPBB.lookup(BB)) {
125 // Retrieve existing VPBB.
126 return VPBB;
127 }
128
129 // Create new VPBB.
130 StringRef Name = BB->getName();
131 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
132 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
133 BB2VPBB[BB] = VPBB;
134 return VPBB;
135}
136
137#ifndef NDEBUG
138// Return true if \p Val is considered an external definition. An external
139// definition is either:
140// 1. A Value that is not an Instruction. This will be refined in the future.
141// 2. An Instruction that is outside of the IR region represented in VPlan,
142// i.e., is not part of the loop nest.
143bool PlainCFGBuilder::isExternalDef(Value *Val) {
144 // All the Values that are not Instructions are considered external
145 // definitions for now.
147 if (!Inst)
148 return true;
149
150 // Check whether Instruction definition is in loop body.
151 return !TheLoop->contains(Inst);
152}
153#endif
154
155// Create a new VPValue or retrieve an existing one for the Instruction's
156// operand \p IRVal. This function must only be used to create/retrieve VPValues
157// for *Instruction's operands* and not to create regular VPInstruction's. For
158// the latter, please, look at 'createVPInstructionsForVPBB'.
159VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
160 auto VPValIt = IRDef2VPValue.find(IRVal);
161 if (VPValIt != IRDef2VPValue.end())
162 // Operand has an associated VPInstruction or VPValue that was previously
163 // created.
164 return VPValIt->second;
165
166 // Operand doesn't have a previously created VPInstruction/VPValue. This
167 // means that operand is:
168 // A) a definition external to VPlan,
169 // B) any other Value without specific representation in VPlan.
170 // For now, we use VPValue to represent A and B and classify both as external
171 // definitions. We may introduce specific VPValue subclasses for them in the
172 // future.
173 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
174
175 // A and B: Create VPValue and add it to the pool of external definitions and
176 // to the Value->VPValue map.
177 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
178 IRDef2VPValue[IRVal] = NewVPVal;
179 return NewVPVal;
180}
181
182// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
183// counterpart. This function must be invoked in RPO so that the operands of a
184// VPInstruction in \p BB have been visited before (except for Phi nodes).
185void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
186 BasicBlock *BB) {
187 VPIRBuilder.setInsertPoint(VPBB);
188 // TODO: Model and preserve debug intrinsics in VPlan.
189 for (Instruction &InstRef : *BB) {
190 Instruction *Inst = &InstRef;
191
192 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
193 // visited Inst when we shouldn't, breaking the RPO traversal order.
194 assert(!IRDef2VPValue.count(Inst) &&
195 "Instruction shouldn't have been visited.");
196
197 if (isa<UncondBrInst>(Inst))
198 // Skip the rest of the Instruction processing for Branch instructions.
199 continue;
200
201 if (auto *Br = dyn_cast<CondBrInst>(Inst)) {
202 // Conditional branch instruction are represented using BranchOnCond
203 // recipes.
204 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
205 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
206 VPIRMetadata(*Inst), Inst->getDebugLoc());
207 continue;
208 }
209
210 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
211 // Don't emit recipes for unconditional switch instructions.
212 if (SI->getNumCases() == 0)
213 continue;
214 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
215 for (auto Case : SI->cases())
216 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
217 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
218 VPIRMetadata(*Inst), Inst->getDebugLoc());
219 continue;
220 }
221
222 VPSingleDefRecipe *NewR;
223 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
224 // Phi node's operands may not have been visited at this point. We create
225 // an empty VPInstruction that we will fix once the whole plain CFG has
226 // been built.
227 NewR =
228 VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi", *Phi);
229 NewR->setUnderlyingValue(Phi);
230 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
231 // Header phis need to be fixed after the VPBB for the latch has been
232 // created.
233 PhisToFix.push_back(Phi);
234 } else {
235 // Add operands for VPPhi in the order matching its predecessors in
236 // VPlan.
237 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
238 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
239 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
240 getOrCreateVPOperand(Phi->getIncomingValue(I));
241 }
242 for (VPBlockBase *Pred : VPBB->getPredecessors())
243 NewR->addOperand(
244 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
245 }
246 } else {
247 // Build VPIRMetadata from the instruction and add loop versioning
248 // metadata for loads and stores.
249 VPIRMetadata MD(*Inst);
250 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
251 const auto &[AliasScopeMD, NoAliasMD] =
252 LVer->getNoAliasMetadataFor(Inst);
253 if (AliasScopeMD)
254 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
255 if (NoAliasMD)
256 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
257 }
258
259 // Translate LLVM-IR operands into VPValue operands and set them in the
260 // new VPInstruction.
261 SmallVector<VPValue *, 4> VPOperands;
262 for (Value *Op : Inst->operands())
263 VPOperands.push_back(getOrCreateVPOperand(Op));
264
265 if (auto *CI = dyn_cast<CastInst>(Inst)) {
266 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
267 CI->getType(), CI->getDebugLoc(),
268 VPIRFlags(*CI), MD);
269 NewR->setUnderlyingValue(CI);
270 } else if (auto *LI = dyn_cast<LoadInst>(Inst)) {
271 NewR = VPIRBuilder.createScalarLoad(LI->getType(), VPOperands[0],
272 LI->getDebugLoc(), MD);
273 NewR->setUnderlyingValue(LI);
274 } else {
275 // Build VPInstruction for any arbitrary Instruction without specific
276 // representation in VPlan.
277 NewR =
278 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
279 VPIRFlags(*Inst), MD, Inst->getDebugLoc());
280 }
281 }
282
283 IRDef2VPValue[Inst] = NewR;
284 }
285}
286
287// Main interface to build the plain CFG.
288std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
289 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
290 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
291 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
292 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
293
294 // 1. Scan the body of the loop in a topological order to visit each basic
295 // block after having visited its predecessor basic blocks. Create a VPBB for
296 // each BB and link it to its successor and predecessor VPBBs. Note that
297 // predecessors must be set in the same order as they are in the incomming IR.
298 // Otherwise, there might be problems with existing phi nodes and algorithm
299 // based on predecessors traversal.
300
301 // Loop PH needs to be explicitly visited since it's not taken into account by
302 // LoopBlocksDFS.
303 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
304 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
305 "Unexpected loop preheader");
306 for (auto &I : *ThePreheaderBB) {
307 if (I.getType()->isVoidTy())
308 continue;
309 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
310 }
311
312 LoopBlocksRPO RPO(TheLoop);
313 RPO.perform(LI);
314
315 for (BasicBlock *BB : RPO) {
316 // Create or retrieve the VPBasicBlock for this BB.
317 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
318 // Set VPBB predecessors in the same order as they are in the incoming BB.
319 setVPBBPredsFromBB(VPBB, BB);
320
321 // Create VPInstructions for BB.
322 createVPInstructionsForVPBB(VPBB, BB);
323
324 // Set VPBB successors. We create empty VPBBs for successors if they don't
325 // exist already. Recipes will be created when the successor is visited
326 // during the RPO traversal.
327 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
329 getOrCreateVPBB(SI->getDefaultDest())};
330 for (auto Case : SI->cases())
331 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
332 VPBB->setSuccessors(Succs);
333 continue;
334 }
335 if (auto *BI = dyn_cast<UncondBrInst>(BB->getTerminator())) {
336 VPBB->setOneSuccessor(getOrCreateVPBB(BI->getSuccessor()));
337 continue;
338 }
339 auto *BI = cast<CondBrInst>(BB->getTerminator());
340 BasicBlock *IRSucc0 = BI->getSuccessor(0);
341 BasicBlock *IRSucc1 = BI->getSuccessor(1);
342 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
343 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
344 VPBB->setTwoSuccessors(Successor0, Successor1);
345 }
346
347 for (auto *EB : Plan->getExitBlocks())
348 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
349
350 // 2. The whole CFG has been built at this point so all the input Values must
351 // have a VPlan counterpart. Fix VPlan header phi by adding their
352 // corresponding VPlan operands.
353 fixHeaderPhis();
354
355 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
356 Plan->getEntry()->setPlan(&*Plan);
357
358 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
359 // VPIRInstructions wrapping them.
360 // // Note that the operand order corresponds to IR predecessor order, and may
361 // need adjusting when VPlan predecessors are added, if an exit block has
362 // multiple predecessor.
363 for (auto *EB : Plan->getExitBlocks()) {
364 for (VPRecipeBase &R : EB->phis()) {
365 auto *PhiR = cast<VPIRPhi>(&R);
366 PHINode &Phi = PhiR->getIRPhi();
367 assert(PhiR->getNumOperands() == 0 &&
368 "no phi operands should be added yet");
369 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
370 PhiR->addOperand(
371 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
372 }
373 }
374
375 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
376 return std::move(Plan);
377}
378
379/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
380/// has exactly 2 predecessors (preheader and latch), where the block
381/// dominates the latch and the preheader dominates the block. If it is a
382/// header block return true and canonicalize the predecessors of the header
383/// (making sure the preheader appears first and the latch second) and the
384/// successors of the latch (making sure the loop exit comes first). Otherwise
385/// return false.
387 const VPDominatorTree &VPDT) {
388 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
389 if (Preds.size() != 2)
390 return false;
391
392 auto *PreheaderVPBB = Preds[0];
393 auto *LatchVPBB = Preds[1];
394 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
395 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
396 std::swap(PreheaderVPBB, LatchVPBB);
397
398 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
399 !VPDT.dominates(HeaderVPB, LatchVPBB))
400 return false;
401
402 // Canonicalize predecessors of header so that preheader is first and
403 // latch second.
404 HeaderVPB->swapPredecessors();
405 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
406 R.swapOperands();
407 }
408
409 // The two successors of conditional branch match the condition, with the
410 // first successor corresponding to true and the second to false. We
411 // canonicalize the successors of the latch when introducing the region, such
412 // that the latch exits the region when its condition is true; invert the
413 // original condition if the original CFG branches to the header on true.
414 // Note that the exit edge is not yet connected for top-level loops.
415 if (LatchVPBB->getSingleSuccessor() ||
416 LatchVPBB->getSuccessors()[0] != HeaderVPB)
417 return true;
418
419 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
420 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
423 "terminator must be a BranchOnCond");
424 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
425 Not->insertBefore(Term);
426 Term->setOperand(0, Not);
427 LatchVPBB->swapSuccessors();
428
429 return true;
430}
431
432/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
433static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
434 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
435 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
436
437 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
438 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
439
440 // Create an empty region first and insert it between PreheaderVPBB and
441 // the exit blocks, taking care to preserve the original predecessor &
442 // successor order of blocks. Set region entry and exiting after both
443 // HeaderVPB and LatchVPBB have been disconnected from their
444 // predecessors/successors.
445 auto *R = Plan.createLoopRegion();
446
447 // Transfer latch's successors to the region.
449
450 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
451 R->setEntry(HeaderVPB);
452 R->setExiting(LatchVPBB);
453
454 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
455 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
456 VPBB->setParent(R);
457}
458
459// Add the necessary canonical IV and branch recipes required to control the
460// loop.
461static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
462 VPBasicBlock *LatchVPBB, Type *IdxTy,
463 DebugLoc DL) {
464 auto *StartV = Plan.getConstantInt(IdxTy, 0);
465
466 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
467 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
468 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
469
470 // We are about to replace the branch to exit the region. Remove the original
471 // BranchOnCond, if there is any.
472 DebugLoc LatchDL = DL;
473 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
474 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
475 LatchVPBB->getTerminator()->eraseFromParent();
476 }
477
478 VPBuilder Builder(LatchVPBB);
479 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
480 // Initially the induction increment is guaranteed to not wrap, but that may
481 // change later, e.g. when tail-folding, when the flags need to be dropped.
482 auto *CanonicalIVIncrement = Builder.createAdd(
483 CanonicalIVPHI, &Plan.getVFxUF(), DL, "index.next", {true, false});
484 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
485
486 // Add the BranchOnCount VPInstruction to the latch.
487 Builder.createNaryOp(VPInstruction::BranchOnCount,
488 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
489 LatchDL);
490}
491
492/// Creates extracts for values in \p Plan defined in a loop region and used
493/// outside a loop region.
494static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
495 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
496 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
497 if (!is_contained(EB->predecessors(), MiddleVPBB))
498 continue;
499
500 for (VPRecipeBase &R : EB->phis()) {
501 auto *ExitIRI = cast<VPIRPhi>(&R);
502 VPValue *Exiting = ExitIRI->getIncomingValueForBlock(MiddleVPBB);
503 if (isa<VPIRValue>(Exiting))
504 continue;
505 Exiting = B.createNaryOp(VPInstruction::ExtractLastPart, Exiting);
506 Exiting = B.createNaryOp(VPInstruction::ExtractLastLane, Exiting);
507 ExitIRI->setIncomingValueForBlock(MiddleVPBB, Exiting);
508 }
509 }
510}
511
512/// Return an iterator range to iterate over pairs of matching phi nodes in
513/// \p Header and \p ScalarHeader, skipping the canonical IV in the former.
515 VPBasicBlock *ScalarHeader) {
516 return zip_equal(drop_begin(Header->phis()), ScalarHeader->phis());
517}
518
519static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
520 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
521 VPDominatorTree VPDT(Plan);
522
523 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
524 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
525 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
526
527 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
528 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
529
530 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
531 // The canonical LatchVPBB has the header block as last successor. If it has
532 // another successor, this successor is an exit block - insert middle block on
533 // its edge. Otherwise, add middle block as another successor retaining header
534 // as last.
535 if (LatchVPBB->getNumSuccessors() == 2) {
536 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
537 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
538 } else {
539 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
540 LatchVPBB->swapSuccessors();
541 }
542
543 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
544
545 // Create SCEV and VPValue for the trip count.
546 // We use the symbolic max backedge-taken-count, which works also when
547 // vectorizing loops with uncountable early exits.
548 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
549 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
550 "Invalid backedge-taken count");
551 ScalarEvolution &SE = *PSE.getSE();
552 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
553 InductionTy, TheLoop);
555
556 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
558
559 // The connection order corresponds to the operands of the conditional branch,
560 // with the middle block already connected to the exit block.
561 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
562 // Also connect the entry block to the scalar preheader.
563 // TODO: Also introduce a branch recipe together with the minimum trip count
564 // check.
565 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
566 Plan.getEntry()->swapSuccessors();
567
568 createExtractsForLiveOuts(Plan, MiddleVPBB);
569
570 // Create resume phis in the scalar preheader for each phi in the scalar loop.
571 // Their incoming value from the vector loop will be the last lane of the
572 // corresponding vector loop header phi.
573 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
574 VPBuilder ScalarPHBuilder(ScalarPH);
575 assert(equal(ScalarPH->getPredecessors(),
576 ArrayRef<VPBlockBase *>({MiddleVPBB, Plan.getEntry()})) &&
577 "unexpected predecessor order of scalar ph");
578 for (const auto &[PhiR, ScalarPhiR] :
579 getMatchingPhisForScalarLoop(HeaderVPBB, Plan.getScalarHeader())) {
580 auto *VectorPhiR = cast<VPPhi>(&PhiR);
581 VPValue *BackedgeVal = VectorPhiR->getOperand(1);
582 VPValue *ResumeFromVectorLoop =
583 MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, BackedgeVal);
584 ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
585 VPInstruction::ExtractLastLane, ResumeFromVectorLoop);
586 // Create scalar resume phi, with the first operand being the incoming value
587 // from the middle block and the second operand coming from the entry block.
588 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
589 {ResumeFromVectorLoop, VectorPhiR->getOperand(0)},
590 VectorPhiR->getDebugLoc());
591 cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
592 }
593}
594
595/// Check \p Plan's live-in and replace them with constants, if they can be
596/// simplified via SCEV.
599 auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
600 const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, PSE);
601 if (auto *C = dyn_cast<SCEVConstant>(Expr))
602 return Plan.getOrAddLiveIn(C->getValue());
603 return nullptr;
604 };
605
606 for (VPValue *LiveIn : to_vector(Plan.getLiveIns())) {
607 if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
608 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
609 }
610}
611
612/// To make RUN_VPLAN_PASS print initial VPlan.
614
615std::unique_ptr<VPlan>
616VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
618 LoopVersioning *LVer) {
619 PlainCFGBuilder Builder(TheLoop, &LI, LVer);
620 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
621 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
622 simplifyLiveInsWithSCEV(*VPlan0, PSE);
623
625 return VPlan0;
626}
627
628/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
629/// for \p Phi based on \p IndDesc.
630static VPHeaderPHIRecipe *
632 const InductionDescriptor &IndDesc, VPlan &Plan,
633 PredicatedScalarEvolution &PSE, Loop &OrigLoop,
634 DebugLoc DL) {
635 [[maybe_unused]] ScalarEvolution &SE = *PSE.getSE();
636 assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
637 "step must be loop invariant");
638 assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
639 (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
640 SE.getSCEV(IndDesc.getStartValue()) ==
641 vputils::getSCEVExprForVPValue(Start, PSE))) &&
642 "Start VPValue must match IndDesc's start value");
643
644 VPValue *Step =
646
647 VPValue *BackedgeVal = PhiR->getOperand(1);
648 // Replace live-out extracts of WideIV's backedge value by ExitingIVValue
649 // recipes. optimizeInductionLiveOutUsers will later compute the proper
650 // DerivedIV.
651 auto ReplaceExtractsWithExitingIVValue = [&](VPHeaderPHIRecipe *WideIV) {
652 for (VPUser *U : to_vector(BackedgeVal->users())) {
654 continue;
655 auto *ExtractLastPart = cast<VPInstruction>(U);
656 VPUser *ExtractLastPartUser = ExtractLastPart->getSingleUser();
657 assert(ExtractLastPartUser && "must have a single user");
658 if (!match(ExtractLastPartUser, m_ExtractLastLane(m_VPValue())))
659 continue;
660 auto *ExtractLastLane = cast<VPInstruction>(ExtractLastPartUser);
661 assert(is_contained(ExtractLastLane->getParent()->successors(),
662 Plan.getScalarPreheader()) &&
663 "last lane must be extracted in the middle block");
664 VPBuilder Builder(ExtractLastLane);
665 ExtractLastLane->replaceAllUsesWith(
666 Builder.createNaryOp(VPInstruction::ExitingIVValue, {WideIV}));
667 ExtractLastLane->eraseFromParent();
668 ExtractLastPart->eraseFromParent();
669 }
670 };
671
673 auto *WideIV = new VPWidenPointerInductionRecipe(
674 Phi, Start, Step, &Plan.getVFxUF(), IndDesc, DL);
675 ReplaceExtractsWithExitingIVValue(WideIV);
676 return WideIV;
677 }
678
681 "must have an integer or float induction at this point");
682
683 // Update wide induction increments to use the same step as the corresponding
684 // wide induction. This enables detecting induction increments directly in
685 // VPlan and removes redundant splats.
686 if (match(BackedgeVal, m_Add(m_Specific(PhiR), m_VPValue())))
687 BackedgeVal->getDefiningRecipe()->setOperand(1, Step);
688
689 // It is always safe to copy over the NoWrap and FastMath flags. In
690 // particular, when folding tail by masking, the masked-off lanes are never
691 // used, so it is safe.
693
694 auto *WideIV = new VPWidenIntOrFpInductionRecipe(
695 Phi, Start, Step, &Plan.getVF(), IndDesc, Flags, DL);
696
697 ReplaceExtractsWithExitingIVValue(WideIV);
698 return WideIV;
699}
700
702 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
705 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
706 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
707 // Retrieve the header manually from the intial plain-CFG VPlan.
708 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
709 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
710 assert(VPDominatorTree(Plan).dominates(HeaderVPBB,
711 HeaderVPBB->getPredecessors()[1]) &&
712 "header must dominate its latch");
713
714 auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
715 // TODO: Gradually replace uses of underlying instruction by analyses on
716 // VPlan.
717 auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
718 assert(PhiR->getNumOperands() == 2 &&
719 "Must have 2 operands for header phis");
720
721 // Extract common values once.
722 VPIRValue *Start = cast<VPIRValue>(PhiR->getOperand(0));
723 VPValue *BackedgeValue = PhiR->getOperand(1);
724
725 if (FixedOrderRecurrences.contains(Phi)) {
726 // TODO: Currently fixed-order recurrences are modeled as chains of
727 // first-order recurrences. If there are no users of the intermediate
728 // recurrences in the chain, the fixed order recurrence should be
729 // modeled directly, enabling more efficient codegen.
730 return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
731 }
732
733 auto InductionIt = Inductions.find(Phi);
734 if (InductionIt != Inductions.end())
735 return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
736 Plan, PSE, OrigLoop,
737 PhiR->getDebugLoc());
738
739 assert(Reductions.contains(Phi) && "only reductions are expected now");
740 const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
742 Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
743 "incoming value must match start value");
744 // Will be updated later to >1 if reduction is partial.
745 unsigned ScaleFactor = 1;
746 bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
747 return new VPReductionPHIRecipe(
748 Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
749 getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
750 ScaleFactor),
751 Phi->getType()->isFloatingPointTy() ? RdxDesc.getFastMathFlags()
752 : VPIRFlags(),
754 };
755
757 "first recipe must be canonical IV phi");
758 for (VPRecipeBase &R : make_early_inc_range(drop_begin(HeaderVPBB->phis()))) {
759 auto *PhiR = cast<VPPhi>(&R);
760 VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
761 HeaderPhiR->insertBefore(PhiR);
762 PhiR->replaceAllUsesWith(HeaderPhiR);
763 PhiR->eraseFromParent();
764 }
765
766 for (const auto &[HeaderPhiR, ScalarPhiR] :
768 auto *ResumePhiR = cast<VPPhi>(&ScalarPhiR);
769 if (isa<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhiR)) {
770 ResumePhiR->setName("scalar.recur.init");
771 auto *ExtractLastLane = cast<VPInstruction>(ResumePhiR->getOperand(0));
772 ExtractLastLane->setName("vector.recur.extract");
773 continue;
774 }
775 ResumePhiR->setName(isa<VPWidenInductionRecipe>(HeaderPhiR)
776 ? "bc.resume.val"
777 : "bc.merge.rdx");
778 }
779}
780
782 VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
783 ElementCount MinVF) {
784 VPTypeAnalysis TypeInfo(Plan);
787
788 for (VPRecipeBase &R : Header->phis()) {
789 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
790 if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
791 continue;
792
793 RecurKind Kind = PhiR->getRecurrenceKind();
797 "AnyOf and Find reductions are not allowed for in-loop reductions");
798
799 bool IsFPRecurrence =
801 FastMathFlags FMFs =
802 IsFPRecurrence ? FastMathFlags::getFast() : FastMathFlags();
803
804 // Collect the chain of "link" recipes for the reduction starting at PhiR.
806 Worklist.insert(PhiR);
807 for (unsigned I = 0; I != Worklist.size(); ++I) {
808 VPSingleDefRecipe *Cur = Worklist[I];
809 for (VPUser *U : Cur->users()) {
810 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
811 if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
812 assert((UserRecipe->getParent() == Plan.getMiddleBlock() ||
813 UserRecipe->getParent() == Plan.getScalarPreheader()) &&
814 "U must be either in the loop region, the middle block or the "
815 "scalar preheader.");
816 continue;
817 }
818
819 // Stores using instructions will be sunk later.
821 continue;
822 Worklist.insert(UserRecipe);
823 }
824 }
825
826 // Visit operation "Links" along the reduction chain top-down starting from
827 // the phi until LoopExitValue. We keep track of the previous item
828 // (PreviousLink) to tell which of the two operands of a Link will remain
829 // scalar and which will be reduced. For minmax by select(cmp), Link will be
830 // the select instructions. Blend recipes of in-loop reduction phi's will
831 // get folded to their non-phi operand, as the reduction recipe handles the
832 // condition directly.
833 VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
834 for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
835 if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
836 assert(Blend->getNumIncomingValues() == 2 &&
837 "Blend must have 2 incoming values");
838 unsigned PhiRIdx = Blend->getIncomingValue(0) == PhiR ? 0 : 1;
839 assert(Blend->getIncomingValue(PhiRIdx) == PhiR &&
840 "PhiR must be an operand of the blend");
841 Blend->replaceAllUsesWith(Blend->getIncomingValue(1 - PhiRIdx));
842 continue;
843 }
844
845 if (IsFPRecurrence) {
846 FastMathFlags CurFMF =
847 cast<VPRecipeWithIRFlags>(CurrentLink)->getFastMathFlags();
848 if (match(CurrentLink, m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
849 CurFMF |= cast<VPRecipeWithIRFlags>(CurrentLink->getOperand(0))
850 ->getFastMathFlags();
851 FMFs &= CurFMF;
852 }
853
854 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
855
856 // Recognize a call to the llvm.fmuladd intrinsic.
857 bool IsFMulAdd = Kind == RecurKind::FMulAdd;
858 VPValue *VecOp;
859 VPBasicBlock *LinkVPBB = CurrentLink->getParent();
860 if (IsFMulAdd) {
862 "Expected current VPInstruction to be a call to the "
863 "llvm.fmuladd intrinsic");
864 assert(CurrentLink->getOperand(2) == PreviousLink &&
865 "expected a call where the previous link is the added operand");
866
867 // If the instruction is a call to the llvm.fmuladd intrinsic then we
868 // need to create an fmul recipe (multiplying the first two operands of
869 // the fmuladd together) to use as the vector operand for the fadd
870 // reduction.
871 auto *FMulRecipe = new VPInstruction(
872 Instruction::FMul,
873 {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
874 CurrentLinkI->getFastMathFlags());
875 LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
876 VecOp = FMulRecipe;
877 } else if (Kind == RecurKind::AddChainWithSubs &&
878 match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) {
879 Type *PhiTy = TypeInfo.inferScalarType(PhiR);
880 auto *Zero = Plan.getConstantInt(PhiTy, 0);
881 VPBuilder Builder(LinkVPBB, CurrentLink->getIterator());
882 auto *Sub = Builder.createSub(Zero, CurrentLink->getOperand(1),
883 CurrentLinkI->getDebugLoc());
884 Sub->setUnderlyingValue(CurrentLinkI);
885 VecOp = Sub;
886 } else {
887 // Index of the first operand which holds a non-mask vector operand.
888 unsigned IndexOfFirstOperand = 0;
890 if (match(CurrentLink, m_Cmp(m_VPValue(), m_VPValue())))
891 continue;
892 assert(match(CurrentLink,
894 "must be a select recipe");
895 IndexOfFirstOperand = 1;
896 }
897 // Note that for non-commutable operands (cmp-selects), the semantics of
898 // the cmp-select are captured in the recurrence kind.
899 unsigned VecOpId =
900 CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
901 ? IndexOfFirstOperand + 1
902 : IndexOfFirstOperand;
903 VecOp = CurrentLink->getOperand(VecOpId);
904 assert(
905 VecOp != PreviousLink &&
906 CurrentLink->getOperand(
907 cast<VPInstruction>(CurrentLink)->getNumOperandsWithoutMask() -
908 1 - (VecOpId - IndexOfFirstOperand)) == PreviousLink &&
909 "PreviousLink must be the operand other than VecOp");
910 }
911
912 // Get block mask from CurrentLink, if it needs predication.
913 VPValue *CondOp = nullptr;
914 if (BlocksNeedingPredication.contains(CurrentLinkI->getParent()))
915 CondOp = cast<VPInstruction>(CurrentLink)->getMask();
916
917 assert(PhiR->getVFScaleFactor() == 1 &&
918 "inloop reductions must be unscaled");
919 auto *RedRecipe = new VPReductionRecipe(
920 Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
921 getReductionStyle(/*IsInLoop=*/true, PhiR->isOrdered(), 1),
922 CurrentLinkI->getDebugLoc());
923 // Append the recipe to the end of the VPBasicBlock because we need to
924 // ensure that it comes after all of it's inputs, including CondOp.
925 // Delete CurrentLink as it will be invalid if its operand is replaced
926 // with a reduction defined at the bottom of the block in the next link.
927 if (LinkVPBB->getNumSuccessors() == 0)
928 RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
929 else
930 LinkVPBB->appendRecipe(RedRecipe);
931
932 CurrentLink->replaceAllUsesWith(RedRecipe);
933 ToDelete.push_back(CurrentLink);
934 PreviousLink = RedRecipe;
935 }
936 }
937
938 for (VPRecipeBase *R : ToDelete)
939 R->eraseFromParent();
940}
941
942/// Check if all loads in the loop are dereferenceable. Iterates over all blocks
943/// reachable from \p HeaderVPBB, skipping \p MiddleVPBB. Returns false if any
944/// non-dereferenceable load is found.
946 VPBasicBlock *MiddleVPBB, Loop *TheLoop,
949 ScalarEvolution &SE = *PSE.getSE();
950 const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
952 vp_depth_first_shallow(HeaderVPBB))) {
953 // Skip blocks outside the loop (exit blocks and their successors).
954 if (VPBB == MiddleVPBB)
955 continue;
956 for (VPRecipeBase &R : *VPBB) {
957 auto *VPI = dyn_cast<VPInstructionWithType>(&R);
958 if (!VPI || VPI->getOpcode() != Instruction::Load)
959 continue;
960
961 // Get the pointer SCEV for dereferenceability checking.
962 VPValue *Ptr = VPI->getOperand(0);
963 const SCEV *PtrSCEV = vputils::getSCEVExprForVPValue(Ptr, PSE, TheLoop);
964 if (isa<SCEVCouldNotCompute>(PtrSCEV)) {
965 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Found non-dereferenceable "
966 "load with SCEVCouldNotCompute pointer\n");
967 return false;
968 }
969
970 // Check dereferenceability using the SCEV-based version.
971 Type *LoadTy = VPI->getResultType();
972 const SCEV *SizeSCEV =
973 SE.getStoreSizeOfExpr(DL.getIndexType(PtrSCEV->getType()), LoadTy);
974 auto *Load = cast<LoadInst>(VPI->getUnderlyingValue());
976 if (isDereferenceableAndAlignedInLoop(PtrSCEV, Load->getAlign(), SizeSCEV,
977 TheLoop, SE, DT, AC, &Preds))
978 continue;
979
981 dbgs() << "LV: Not vectorizing: Auto-vectorization of loops with "
982 "potentially faulting load is not supported.\n");
983 return false;
984 }
985 }
986 return true;
987}
988
990 Loop *TheLoop,
993 auto *MiddleVPBB = cast<VPBasicBlock>(
995 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
996 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
997
998 // TODO: We would like to detect uncountable exits and stores within loops
999 // with such exits from the VPlan alone. Exit detection can be moved
1000 // here from handleUncountableEarlyExits, but we need to improve
1001 // detection of recipes which may write to memory.
1003 if (!areAllLoadsDereferenceable(HeaderVPBB, MiddleVPBB, TheLoop, PSE, DT,
1004 AC))
1005 return false;
1006 // TODO: Check target preference for style.
1007 handleUncountableEarlyExits(Plan, HeaderVPBB, LatchVPBB, MiddleVPBB, Style);
1008 return true;
1009 }
1010
1011 // Disconnect countable early exits from the loop, leaving it with a single
1012 // exit from the latch. Countable early exits are left for a scalar epilog.
1013 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
1014 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
1015 if (Pred == MiddleVPBB)
1016 continue;
1017
1018 // Remove phi operands for the early exiting block.
1019 for (VPRecipeBase &R : EB->phis())
1020 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
1021 auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
1022 EarlyExitingVPBB->getTerminator()->eraseFromParent();
1024 }
1025 }
1026 return true;
1027}
1028
1029void VPlanTransforms::addMiddleCheck(VPlan &Plan, bool TailFolded) {
1030 auto *MiddleVPBB = cast<VPBasicBlock>(
1032 // If MiddleVPBB has a single successor then the original loop does not exit
1033 // via the latch and the single successor must be the scalar preheader.
1034 // There's no need to add a runtime check to MiddleVPBB.
1035 if (MiddleVPBB->getNumSuccessors() == 1) {
1036 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
1037 "must have ScalarPH as single successor");
1038 return;
1039 }
1040
1041 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
1042
1043 // Add a check in the middle block to see if we have completed all of the
1044 // iterations in the first vector loop.
1045 //
1046 // Three cases:
1047 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
1048 // condition to false.
1049 // 2) If (N - N%VF) == N, then we *don't* need to run the
1050 // remainder. Thus if tail is to be folded, we know we don't need to run
1051 // the remainder and we can set the condition to true.
1052 // 3) Otherwise, construct a runtime check.
1053
1054 // We use the same DebugLoc as the scalar loop latch terminator instead of
1055 // the corresponding compare because they may have ended up with different
1056 // line numbers and we want to avoid awkward line stepping while debugging.
1057 // E.g., if the compare has got a line number inside the loop.
1058 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
1059 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
1060 VPBuilder Builder(MiddleVPBB);
1061 VPValue *Cmp;
1062 if (TailFolded)
1063 Cmp = Plan.getTrue();
1064 else
1065 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
1066 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
1067 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
1068}
1069
1071 VPDominatorTree VPDT(Plan);
1072 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
1073 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
1074 createLoopRegion(Plan, HeaderVPB);
1075
1076 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
1077 TopRegion->setName("vector loop");
1078 TopRegion->getEntryBasicBlock()->setName("vector.body");
1079}
1080
1082 assert(Plan.getExitBlocks().size() == 1 &&
1083 "only a single-exit block is supported currently");
1084 assert(Plan.getExitBlocks().front()->getSinglePredecessor() ==
1085 Plan.getMiddleBlock() &&
1086 "the exit block must have middle block as single predecessor");
1087
1088 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1089 assert(LoopRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
1090 "The vector loop region must have the middle block as its single "
1091 "successor for now");
1092 VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1093
1094 Header->splitAt(Header->getFirstNonPhi());
1095
1096 // Create the header mask, insert it in the header and branch on it.
1097 auto *IV =
1098 new VPWidenCanonicalIVRecipe(Header->getParent()->getCanonicalIV());
1099 VPBuilder Builder(Header, Header->getFirstNonPhi());
1100 Builder.insert(IV);
1102 VPValue *HeaderMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
1103 Builder.createNaryOp(VPInstruction::BranchOnCond, HeaderMask);
1104
1105 VPBasicBlock *OrigLatch = LoopRegion->getExitingBasicBlock();
1106 VPValue *IVInc;
1107 [[maybe_unused]] bool TermBranchOnCount =
1108 match(OrigLatch->getTerminator(),
1110 m_Specific(&Plan.getVectorTripCount())));
1111 assert(TermBranchOnCount &&
1112 match(IVInc, m_Add(m_Specific(LoopRegion->getCanonicalIV()),
1113 m_Specific(&Plan.getVFxUF()))) &&
1114 std::next(IVInc->getDefiningRecipe()->getIterator()) ==
1115 OrigLatch->getTerminator()->getIterator() &&
1116 "Unexpected canonical iv increment");
1117
1118 // Split the latch at the IV update, and branch to it from the header mask.
1119 VPBasicBlock *Latch =
1120 OrigLatch->splitAt(IVInc->getDefiningRecipe()->getIterator());
1121 Latch->setName("vector.latch");
1122 VPBlockUtils::connectBlocks(Header, Latch);
1123
1124 // Collect any values defined in the loop that need a phi. Currently this
1125 // includes header phi backedges and live-outs extracted in the middle block.
1126 // TODO: Handle early exits via Plan.getExitBlocks()
1128 for (VPRecipeBase &R : Header->phis())
1130 NeedsPhi[cast<VPHeaderPHIRecipe>(R).getBackedgeValue()].push_back(&R);
1131
1132 VPValue *V;
1133 for (VPRecipeBase &R : *Plan.getMiddleBlock())
1134 if (match(&R, m_ExtractLastPart(m_VPValue(V))))
1135 NeedsPhi[V].push_back(&R);
1136
1137 // Insert phis with a poison incoming value for past the end of the tail.
1138 Builder.setInsertPoint(Latch, Latch->begin());
1139 VPTypeAnalysis TypeInfo(Plan);
1140 for (const auto &[V, Users] : NeedsPhi) {
1141 if (isa<VPIRValue>(V))
1142 continue;
1143 // TODO: For reduction phis, use phi value instead of poison so we can
1144 // remove the special casing for tail folding in
1145 // LoopVectorizationPlanner::addReductionResultComputation
1146 VPValue *Poison =
1148 VPInstruction *Phi = Builder.createScalarPhi({V, Poison});
1149 for (VPUser *U : Users)
1150 U->replaceUsesOfWith(V, Phi);
1151 }
1152
1153 // Any extract of the last element must be updated to extract from the last
1154 // active lane of the header mask instead (i.e., the lane corresponding to the
1155 // last active iteration).
1156 Builder.setInsertPoint(Plan.getMiddleBlock()->getTerminator());
1157 for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
1158 VPValue *Op;
1160 continue;
1161
1162 // Compute the index of the last active lane.
1163 VPValue *LastActiveLane =
1164 Builder.createNaryOp(VPInstruction::LastActiveLane, HeaderMask);
1165 auto *Ext =
1166 Builder.createNaryOp(VPInstruction::ExtractLane, {LastActiveLane, Op});
1167 R.getVPSingleValue()->replaceAllUsesWith(Ext);
1168 }
1169}
1170
1171/// Insert \p CheckBlockVPBB on the edge leading to the vector preheader,
1172/// connecting it to both vector and scalar preheaders. Updates scalar
1173/// preheader phis to account for the new predecessor.
1175 VPBasicBlock *CheckBlockVPBB) {
1176 VPBlockBase *VectorPH = Plan.getVectorPreheader();
1177 auto *ScalarPH = cast<VPBasicBlock>(Plan.getScalarPreheader());
1178 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
1179 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
1180 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
1181 CheckBlockVPBB->swapSuccessors();
1182 unsigned NumPreds = ScalarPH->getNumPredecessors();
1183 for (VPRecipeBase &R : ScalarPH->phis()) {
1184 auto *Phi = cast<VPPhi>(&R);
1185 assert(Phi->getNumIncoming() == NumPreds - 1 &&
1186 "must have incoming values for all predecessors");
1187 Phi->addOperand(Phi->getOperand(NumPreds - 2));
1188 }
1189}
1190
1191// Likelyhood of bypassing the vectorized loop due to a runtime check block,
1192// including memory overlap checks block and wrapping/unit-stride checks block.
1193static constexpr uint32_t CheckBypassWeights[] = {1, 127};
1194
1195/// Create a BranchOnCond terminator in \p CheckBlockVPBB. Optionally adds
1196/// branch weights.
1197static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB,
1198 VPValue *Cond, bool AddBranchWeights) {
1200 auto *Term = VPBuilder(CheckBlockVPBB)
1202 if (AddBranchWeights) {
1203 MDBuilder MDB(Plan.getContext());
1204 MDNode *BranchWeights =
1205 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
1206 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1207 }
1208}
1209
1211 BasicBlock *CheckBlock,
1212 bool AddBranchWeights) {
1213 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
1214 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
1215 insertCheckBlockBeforeVectorLoop(Plan, CheckBlockVPBB);
1216 addBypassBranch(Plan, CheckBlockVPBB, CondVPV, AddBranchWeights);
1217}
1218
1220 VPlan &Plan, ElementCount VF, unsigned UF,
1221 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
1222 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
1224 // Generate code to check if the loop's trip count is less than VF * UF, or
1225 // equal to it in case a scalar epilogue is required; this implies that the
1226 // vector trip count is zero. This check also covers the case where adding one
1227 // to the backedge-taken count overflowed leading to an incorrect trip count
1228 // of zero. In this case we will also jump to the scalar loop.
1229 CmpInst::Predicate CmpPred =
1230 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1231 // If tail is to be folded, vector loop takes care of all iterations.
1232 VPValue *TripCountVPV = Plan.getTripCount();
1233 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, PSE);
1234 Type *TripCountTy = TripCount->getType();
1235 ScalarEvolution &SE = *PSE.getSE();
1236 auto GetMinTripCount = [&]() -> const SCEV * {
1237 // Compute max(MinProfitableTripCount, UF * VF) and return it.
1238 const SCEV *VFxUF =
1239 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
1240 if (UF * VF.getKnownMinValue() >=
1241 MinProfitableTripCount.getKnownMinValue()) {
1242 // TODO: SCEV should be able to simplify test.
1243 return VFxUF;
1244 }
1245 const SCEV *MinProfitableTripCountSCEV =
1246 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
1247 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
1248 };
1249
1250 VPBasicBlock *EntryVPBB = Plan.getEntry();
1251 VPBuilder Builder(EntryVPBB);
1252 VPValue *TripCountCheck = Plan.getFalse();
1253 const SCEV *Step = GetMinTripCount();
1254 // TripCountCheck = false, folding tail implies positive vector trip
1255 // count.
1256 if (!TailFolded) {
1257 // TODO: Emit unconditional branch to vector preheader instead of
1258 // conditional branch with known condition.
1259 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
1260 // Check if the trip count is < the step.
1261 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
1262 // TODO: Ensure step is at most the trip count when determining max VF and
1263 // UF, w/o tail folding.
1264 TripCountCheck = Plan.getTrue();
1265 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
1266 TripCount, Step)) {
1267 // Generate the minimum iteration check only if we cannot prove the
1268 // check is known to be true, or known to be false.
1269 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
1270 TripCountCheck = Builder.createICmp(
1271 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
1272 } // else step known to be < trip count, use TripCountCheck preset to false.
1273 }
1274 VPInstruction *Term =
1275 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
1277 MDBuilder MDB(Plan.getContext());
1278 MDNode *BranchWeights = MDB.createBranchWeights(
1279 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
1280 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1281 }
1282}
1283
1285 VPlan &Plan, Value *TripCount, Value *VectorTripCount,
1286 bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
1287 unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE) {
1288 // Add the minimum iteration check for the epilogue vector loop.
1289 VPValue *TC = Plan.getOrAddLiveIn(TripCount);
1290 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
1291 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
1292 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
1293 VPValue *Count = Builder.createSub(TC, Plan.getOrAddLiveIn(VectorTripCount),
1294 DebugLoc::getUnknown(), "n.vec.remaining");
1295
1296 // Generate code to check if the loop's trip count is less than VF * UF of
1297 // the vector epilogue loop.
1298 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1299 auto *CheckMinIters = Builder.createICmp(
1300 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
1301 VPInstruction *Branch =
1302 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
1303
1304 // We assume the remaining `Count` is equally distributed in
1305 // [0, MainLoopStep)
1306 // So the probability for `Count < EpilogueLoopStep` should be
1307 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
1308 // TODO: Improve the estimate by taking the estimated trip count into
1309 // consideration.
1310 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
1311 const uint32_t Weights[] = {EstimatedSkipCount,
1312 MainLoopStep - EstimatedSkipCount};
1313 MDBuilder MDB(Plan.getContext());
1314 MDNode *BranchWeights =
1315 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
1316 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
1317}
1318
1319/// Find and return the final select instruction of the FindIV result pattern
1320/// for the given \p BackedgeVal:
1321/// select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),
1322/// ComputeReductionResult(ReducedIV), Start.
1324 return cast<VPInstruction>(
1325 vputils::findRecipe(BackedgeVal, [BackedgeVal](VPRecipeBase *R) {
1326 auto *VPI = dyn_cast<VPInstruction>(R);
1327 return VPI &&
1328 matchFindIVResult(VPI, m_Specific(BackedgeVal), m_VPValue());
1329 }));
1330}
1331
1333 auto GetMinOrMaxCompareValue =
1334 [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
1335 auto *MinOrMaxR =
1336 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
1337 if (!MinOrMaxR)
1338 return nullptr;
1339
1340 // Check that MinOrMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1341 // with an intrinsic that matches the reduction kind.
1342 Intrinsic::ID ExpectedIntrinsicID =
1343 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
1344 if (!match(MinOrMaxR, m_Intrinsic(ExpectedIntrinsicID)))
1345 return nullptr;
1346
1347 if (MinOrMaxR->getOperand(0) == RedPhiR)
1348 return MinOrMaxR->getOperand(1);
1349
1350 assert(MinOrMaxR->getOperand(1) == RedPhiR &&
1351 "Reduction phi operand expected");
1352 return MinOrMaxR->getOperand(0);
1353 };
1354
1355 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1357 MinOrMaxNumReductionsToHandle;
1358 bool HasUnsupportedPhi = false;
1359 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
1361 continue;
1362 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
1363 if (!Cur) {
1364 // TODO: Also support fixed-order recurrence phis.
1365 HasUnsupportedPhi = true;
1366 continue;
1367 }
1369 Cur->getRecurrenceKind())) {
1370 HasUnsupportedPhi = true;
1371 continue;
1372 }
1373
1374 VPValue *MinOrMaxOp = GetMinOrMaxCompareValue(Cur);
1375 if (!MinOrMaxOp)
1376 return false;
1377
1378 MinOrMaxNumReductionsToHandle.emplace_back(Cur, MinOrMaxOp);
1379 }
1380
1381 if (MinOrMaxNumReductionsToHandle.empty())
1382 return true;
1383
1384 // We won't be able to resume execution in the scalar tail, if there are
1385 // unsupported header phis or there is no scalar tail at all, due to
1386 // tail-folding.
1387 if (HasUnsupportedPhi || !Plan.hasScalarTail())
1388 return false;
1389
1390 /// Check if the vector loop of \p Plan can early exit and restart
1391 /// execution of last vector iteration in the scalar loop. This requires all
1392 /// recipes up to early exit point be side-effect free as they are
1393 /// re-executed. Currently we check that the loop is free of any recipe that
1394 /// may write to memory. Expected to operate on an early VPlan w/o nested
1395 /// regions.
1398 auto *VPBB = cast<VPBasicBlock>(VPB);
1399 for (auto &R : *VPBB) {
1400 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
1401 return false;
1402 }
1403 }
1404
1405 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1406 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
1407 VPValue *AllNaNLanes = nullptr;
1408 SmallPtrSet<VPValue *, 2> RdxResults;
1409 for (const auto &[_, MinOrMaxOp] : MinOrMaxNumReductionsToHandle) {
1410 VPValue *RedNaNLanes =
1411 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinOrMaxOp, MinOrMaxOp);
1412 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
1413 : RedNaNLanes;
1414 }
1415
1416 VPValue *AnyNaNLane =
1417 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
1418 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
1419 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
1420 for (const auto &[RedPhiR, _] : MinOrMaxNumReductionsToHandle) {
1422 RedPhiR->getRecurrenceKind()) &&
1423 "unsupported reduction");
1424
1425 // If we exit early due to NaNs, compute the final reduction result based on
1426 // the reduction phi at the beginning of the last vector iteration.
1427 auto *RdxResult = vputils::findComputeReductionResult(RedPhiR);
1428 assert(RdxResult && "must find a ComputeReductionResult");
1429
1430 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
1431 RdxResult->getOperand(0));
1432 RdxResult->setOperand(0, NewSel);
1433 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
1434 RdxResults.insert(RdxResult);
1435 }
1436
1437 auto *LatchExitingBranch = LatchVPBB->getTerminator();
1438 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
1439 "Unexpected terminator");
1440 auto *IsLatchExitTaken = LatchBuilder.createICmp(
1441 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
1442 LatchExitingBranch->getOperand(1));
1443 auto *AnyExitTaken = LatchBuilder.createOr(AnyNaNLane, IsLatchExitTaken);
1444 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
1445 LatchExitingBranch->eraseFromParent();
1446
1447 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
1448 // true, the resume from the start of the last vector iteration via the
1449 // canonical IV, otherwise from the original value.
1450 auto IsTC = [&Plan](VPValue *V) {
1451 return V == &Plan.getVectorTripCount() || V == Plan.getTripCount();
1452 };
1453 for (auto &R : Plan.getScalarPreheader()->phis()) {
1454 auto *ResumeR = cast<VPPhi>(&R);
1455 VPValue *VecV = ResumeR->getOperand(0);
1456 if (RdxResults.contains(VecV))
1457 continue;
1458 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
1459 VPValue *DIVTC = DerivedIV->getOperand(1);
1460 if (DerivedIV->getNumUsers() == 1 && IsTC(DIVTC)) {
1461 auto *NewSel = MiddleBuilder.createSelect(
1462 AnyNaNLane, LoopRegion->getCanonicalIV(), DIVTC);
1463 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
1464 DerivedIV->setOperand(1, NewSel);
1465 continue;
1466 }
1467 }
1468 // Bail out and abandon the current, partially modified, VPlan if we
1469 // encounter resume phi that cannot be updated yet.
1470 if (!IsTC(VecV)) {
1471 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
1472 "FMaxNum/FMinNum reduction.\n");
1473 return false;
1474 }
1475 auto *NewSel = MiddleBuilder.createSelect(
1476 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
1477 ResumeR->setOperand(0, NewSel);
1478 }
1479
1480 auto *MiddleTerm = MiddleVPBB->getTerminator();
1481 MiddleBuilder.setInsertPoint(MiddleTerm);
1482 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1483 VPValue *NewCond =
1484 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
1485 MiddleTerm->setOperand(0, NewCond);
1486 return true;
1487}
1488
1490 if (Plan.hasScalarVFOnly())
1491 return false;
1492
1493 // We want to create the following nodes:
1494 // vector.body:
1495 // ...new WidenPHI recipe introduced to keep the mask value for the latest
1496 // iteration where any lane was active.
1497 // mask.phi = phi [ ir<false>, vector.ph ], [ vp<new.mask>, vector.body ]
1498 // ...data.phi (a VPReductionPHIRecipe for a FindLast reduction) already
1499 // exists, but needs updating to use 'new.data' for the backedge value.
1500 // data.phi = phi ir<default.val>, vp<new.data>
1501 //
1502 // ...'data' and 'compare' created by existing nodes...
1503 //
1504 // ...new recipes introduced to determine whether to update the reduction
1505 // values or keep the current one.
1506 // any.active = i1 any-of ir<compare>
1507 // new.mask = select vp<any.active>, ir<compare>, vp<mask.phi>
1508 // new.data = select vp<any.active>, ir<data>, ir<data.phi>
1509 //
1510 // middle.block:
1511 // ...extract-last-active replaces compute-reduction-result.
1512 // result = extract-last-active vp<new.data>, vp<new.mask>, ir<default.val>
1513
1515 for (VPRecipeBase &Phi :
1517 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&Phi);
1519 PhiR->getRecurrenceKind()))
1520 Phis.push_back(PhiR);
1521 }
1522
1523 if (Phis.empty())
1524 return true;
1525
1526 VPValue *HeaderMask = vputils::findHeaderMask(Plan);
1527 for (VPReductionPHIRecipe *PhiR : Phis) {
1528 // Find the condition for the select/blend.
1529 VPValue *BackedgeSelect = PhiR->getBackedgeValue();
1530 VPValue *CondSelect = BackedgeSelect;
1531
1532 // If there's a header mask, the backedge select will not be the find-last
1533 // select.
1534 if (HeaderMask && !match(BackedgeSelect,
1535 m_Select(m_Specific(HeaderMask),
1536 m_VPValue(CondSelect), m_Specific(PhiR))))
1537 llvm_unreachable("expected header mask select");
1538
1539 VPValue *Cond = nullptr, *Op1 = nullptr, *Op2 = nullptr;
1540
1541 // If we're matching a blend rather than a select, there should be one
1542 // incoming value which is the data, then all other incoming values should
1543 // be the phi.
1544 auto MatchBlend = [&](VPRecipeBase *R) {
1545 auto *Blend = dyn_cast<VPBlendRecipe>(R);
1546 if (!Blend)
1547 return false;
1548 assert(!Blend->isNormalized() && "must run before blend normalizaion");
1549 unsigned NumIncomingDataValues = 0;
1550 for (unsigned I = 0; I < Blend->getNumIncomingValues(); ++I) {
1551 VPValue *Incoming = Blend->getIncomingValue(I);
1552 if (Incoming != PhiR) {
1553 ++NumIncomingDataValues;
1554 Cond = Blend->getMask(I);
1555 Op1 = Incoming;
1556 Op2 = PhiR;
1557 }
1558 }
1559 return NumIncomingDataValues == 1;
1560 };
1561
1562 VPSingleDefRecipe *SelectR =
1564 if (!match(SelectR,
1565 m_Select(m_VPValue(Cond), m_VPValue(Op1), m_VPValue(Op2))) &&
1566 !MatchBlend(SelectR))
1567 return false;
1568
1569 assert(Cond != HeaderMask && "Cond must not be HeaderMask");
1570
1571 // Find final reduction computation and replace it with an
1572 // extract.last.active intrinsic.
1573 auto *RdxResult =
1575 BackedgeSelect);
1576 assert(RdxResult && "Could not find reduction result");
1577
1578 // Add mask phi.
1579 VPBuilder Builder = VPBuilder::getToInsertAfter(PhiR);
1580 auto *MaskPHI = new VPWidenPHIRecipe(nullptr, /*Start=*/Plan.getFalse());
1581 Builder.insert(MaskPHI);
1582
1583 // Add select for mask.
1584 Builder.setInsertPoint(SelectR);
1585
1586 if (Op1 == PhiR) {
1587 // Normalize to selecting the data operand when the condition is true by
1588 // swapping operands and negating the condition.
1589 std::swap(Op1, Op2);
1590 Cond = Builder.createNot(Cond);
1591 }
1592 assert(Op2 == PhiR && "data value must be selected if Cond is true");
1593
1594 if (HeaderMask)
1595 Cond = Builder.createLogicalAnd(HeaderMask, Cond);
1596
1597 VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, {Cond});
1598 VPValue *MaskSelect = Builder.createSelect(AnyOf, Cond, MaskPHI);
1599 MaskPHI->addOperand(MaskSelect);
1600
1601 // Replace select for data.
1602 VPValue *DataSelect =
1603 Builder.createSelect(AnyOf, Op1, Op2, SelectR->getDebugLoc());
1604 SelectR->replaceAllUsesWith(DataSelect);
1605 PhiR->setBackedgeValue(DataSelect);
1606 SelectR->eraseFromParent();
1607
1608 Builder.setInsertPoint(RdxResult);
1609 auto *ExtractLastActive =
1610 Builder.createNaryOp(VPInstruction::ExtractLastActive,
1611 {PhiR->getStartValue(), DataSelect, MaskSelect},
1612 RdxResult->getDebugLoc());
1613 RdxResult->replaceAllUsesWith(ExtractLastActive);
1614 RdxResult->eraseFromParent();
1615 }
1616
1617 return true;
1618}
1619
1620/// Given a first argmin/argmax pattern with strict predicate consisting of
1621/// 1) a MinOrMax reduction \p MinOrMaxPhiR producing \p MinOrMaxResult,
1622/// 2) a wide induction \p WideIV,
1623/// 3) a FindLastIV reduction \p FindLastIVPhiR using \p WideIV,
1624/// return the smallest index of the FindLastIV reduction result using UMin,
1625/// unless \p MinOrMaxResult equals the start value of its MinOrMax reduction.
1626/// In that case, return the start value of the FindLastIV reduction instead.
1627/// If \p WideIV is not canonical, a new canonical wide IV is added, and the
1628/// final result is scaled back to the non-canonical \p WideIV.
1629/// The final value of the FindLastIV reduction is originally computed using
1630/// \p FindIVSelect, \p FindIVCmp, and \p FindIVRdxResult, which are replaced
1631/// and removed.
1632/// Returns true if the pattern was handled successfully, false otherwise.
1634 VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR,
1635 VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV,
1636 VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect,
1637 VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult) {
1638 assert(!FindLastIVPhiR->isInLoop() && !FindLastIVPhiR->isOrdered() &&
1639 "inloop and ordered reductions not supported");
1640 assert(FindLastIVPhiR->getVFScaleFactor() == 1 &&
1641 "FindIV reduction must not be scaled");
1642
1644 // TODO: Support non (i.e., narrower than) canonical IV types.
1645 // TODO: Emit remarks for failed transformations.
1646 if (Ty != VPTypeAnalysis(Plan).inferScalarType(WideIV))
1647 return false;
1648
1649 auto *FindIVSelectR = cast<VPSingleDefRecipe>(
1650 FindLastIVPhiR->getBackedgeValue()->getDefiningRecipe());
1651 assert(
1652 match(FindIVSelectR, m_Select(m_VPValue(), m_VPValue(), m_VPValue())) &&
1653 "backedge value must be a select");
1654 if (FindIVSelectR->getOperand(1) != WideIV &&
1655 FindIVSelectR->getOperand(2) != WideIV)
1656 return false;
1657
1658 // If the original wide IV is not canonical, create a new one. The canonical
1659 // wide IV is guaranteed to not wrap for all lanes that are active in the
1660 // vector loop.
1661 if (!WideIV->isCanonical()) {
1662 VPIRValue *Zero = Plan.getConstantInt(Ty, 0);
1663 VPIRValue *One = Plan.getConstantInt(Ty, 1);
1664 auto *WidenCanIV = new VPWidenIntOrFpInductionRecipe(
1665 nullptr, Zero, One, WideIV->getVFValue(),
1666 WideIV->getInductionDescriptor(),
1667 VPIRFlags::WrapFlagsTy(/*HasNUW=*/true, /*HasNSW=*/false),
1668 WideIV->getDebugLoc());
1669 WidenCanIV->insertBefore(WideIV);
1670
1671 // Update the select to use the wide canonical IV.
1672 FindIVSelectR->setOperand(FindIVSelectR->getOperand(1) == WideIV ? 1 : 2,
1673 WidenCanIV);
1674 }
1675 FindLastIVPhiR->setOperand(0, Plan.getOrAddLiveIn(PoisonValue::get(Ty)));
1676
1677 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1678 // result:
1679 // 1. Find the first canonical indices corresponding to partial min/max
1680 // values, using loop reductions.
1681 // 2. Find which of the partial min/max values are equal to the overall
1682 // min/max value.
1683 // 3. Select among the canonical indices those corresponding to the overall
1684 // min/max value.
1685 // 4. Find the first canonical index of overall min/max and scale it back to
1686 // the original IV using VPDerivedIVRecipe.
1687 // 5. If the overall min/max equals the starting min/max, the condition in
1688 // the loop was always false, due to being strict; return the start value
1689 // of FindLastIVPhiR in that case.
1690 //
1691 // For example, we transforms two independent reduction result computations
1692 // for
1693 //
1694 // <x1> vector loop: {
1695 // vector.body:
1696 // ...
1697 // ir<%iv> = WIDEN-INDUCTION nuw nsw ir<10>, ir<1>, vp<%0>
1698 // WIDEN-REDUCTION-PHI ir<%min.idx> = phi ir<sentinel.min.start>,
1699 // ir<%min.idx.next>
1700 // WIDEN-REDUCTION-PHI ir<%min.val> = phi ir<100>, ir<%min.val.next>
1701 // ....
1702 // WIDEN-INTRINSIC ir<%min.val.next> = call llvm.umin(ir<%min.val>, ir<%l>)
1703 // WIDEN ir<%min.idx.next> = select ir<%cmp>, ir<%iv>, ir<%min.idx>
1704 // ...
1705 // }
1706 // Successor(s): middle.block
1707 //
1708 // middle.block:
1709 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
1710 // vp<%min.result> = compute-reduction-result (umin) ir<%min.val.next>
1711 // vp<%cmp> = icmp ne vp<%iv.rdx>, ir<sentinel.min.start>
1712 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<10>
1713 //
1714 //
1715 // Into:
1716 //
1717 // vp<%reduced.min> = compute-reduction-result (umin) ir<%min.val.next>
1718 // vp<%reduced.mins.mask> = icmp eq ir<%min.val.next>, vp<%reduced.min>
1719 // vp<%idxs2reduce> = select vp<%reduced.mins.mask>, ir<%min.idx.next>,
1720 // ir<MaxUInt>
1721 // vp<%reduced.idx> = compute-reduction-result (umin) vp<%idxs2reduce>
1722 // vp<%scaled.idx> = DERIVED-IV ir<20> + vp<%reduced.idx> * ir<1>
1723 // vp<%always.false> = icmp eq vp<%reduced.min>, ir<100>
1724 // vp<%final.idx> = select vp<%always.false>, ir<10>,
1725 // vp<%scaled.idx>
1726
1727 VPBuilder Builder(FindIVRdxResult);
1728 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
1729 auto *FinalMinOrMaxCmp =
1730 Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
1731 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
1732 VPValue *MaxIV =
1733 Plan.getConstantInt(APInt::getMaxValue(Ty->getIntegerBitWidth()));
1734 auto *FinalIVSelect =
1735 Builder.createSelect(FinalMinOrMaxCmp, LastIVExiting, MaxIV);
1736 VPIRFlags RdxFlags(RecurKind::UMin, false, false, FastMathFlags());
1737 VPSingleDefRecipe *FinalCanIV = Builder.createNaryOp(
1738 VPInstruction::ComputeReductionResult, {FinalIVSelect}, RdxFlags,
1739 FindIVRdxResult->getDebugLoc());
1740
1741 // If we used a new wide canonical IV convert the reduction result back to the
1742 // original IV scale before the final select.
1743 if (!WideIV->isCanonical()) {
1744 auto *DerivedIVRecipe =
1746 nullptr, // No FPBinOp for integer induction
1747 WideIV->getStartValue(), FinalCanIV,
1748 WideIV->getStepValue(), "derived.iv.result");
1749 DerivedIVRecipe->insertBefore(&*Builder.getInsertPoint());
1750 FinalCanIV = DerivedIVRecipe;
1751 }
1752
1753 // If the final min/max value matches its start value, the condition in the
1754 // loop was always false, i.e. no induction value has been selected. If that's
1755 // the case, set the result of the IV reduction to its start value.
1756 VPValue *AlwaysFalse = Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxResult,
1757 MinOrMaxPhiR->getStartValue());
1758 VPValue *FinalIV = Builder.createSelect(
1759 AlwaysFalse, FindIVSelect->getOperand(2), FinalCanIV);
1760 FindIVSelect->replaceAllUsesWith(FinalIV);
1761
1762 // Erase the old FindIV result pattern which is now dead.
1763 FindIVSelect->eraseFromParent();
1764 FindIVCmp->eraseFromParent();
1765 FindIVRdxResult->eraseFromParent();
1766 return true;
1767}
1768
1771 Loop *TheLoop) {
1772 for (auto &PhiR : make_early_inc_range(
1774 auto *MinOrMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
1775 // TODO: check for multi-uses in VPlan directly.
1776 if (!MinOrMaxPhiR || !MinOrMaxPhiR->hasUsesOutsideReductionChain())
1777 continue;
1778
1779 // MinOrMaxPhiR has users outside the reduction cycle in the loop. Check if
1780 // the only other user is a FindLastIV reduction. MinOrMaxPhiR must have
1781 // exactly 2 users:
1782 // 1) the min/max operation of the reduction cycle, and
1783 // 2) the compare of a FindLastIV reduction cycle. This compare must match
1784 // the min/max operation - comparing MinOrMaxPhiR with the operand of the
1785 // min/max operation, and be used only by the select of the FindLastIV
1786 // reduction cycle.
1787 RecurKind RdxKind = MinOrMaxPhiR->getRecurrenceKind();
1788 assert(
1790 "only min/max recurrences support users outside the reduction chain");
1791
1792 auto *MinOrMaxOp =
1793 dyn_cast<VPRecipeWithIRFlags>(MinOrMaxPhiR->getBackedgeValue());
1794 if (!MinOrMaxOp)
1795 return false;
1796
1797 // Check that MinOrMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1798 // with an intrinsic that matches the reduction kind.
1799 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
1800 if (!match(MinOrMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
1801 return false;
1802
1803 // MinOrMaxOp must have 2 users: 1) MinOrMaxPhiR and 2)
1804 // ComputeReductionResult.
1805 assert(MinOrMaxOp->getNumUsers() == 2 &&
1806 "MinOrMaxOp must have exactly 2 users");
1807 VPValue *MinOrMaxOpValue = MinOrMaxOp->getOperand(0);
1808 if (MinOrMaxOpValue == MinOrMaxPhiR)
1809 MinOrMaxOpValue = MinOrMaxOp->getOperand(1);
1810
1811 VPValue *CmpOpA;
1812 VPValue *CmpOpB;
1813 CmpPredicate Pred;
1815 MinOrMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
1816 if (!Cmp || Cmp->getNumUsers() != 1 ||
1817 (CmpOpA != MinOrMaxOpValue && CmpOpB != MinOrMaxOpValue))
1818 return false;
1819
1820 if (MinOrMaxOpValue != CmpOpB)
1821 Pred = CmpInst::getSwappedPredicate(Pred);
1822
1823 // MinOrMaxPhiR must have exactly 2 users:
1824 // * MinOrMaxOp,
1825 // * Cmp (that's part of a FindLastIV chain).
1826 if (MinOrMaxPhiR->getNumUsers() != 2)
1827 return false;
1828
1829 VPInstruction *MinOrMaxResult =
1831 assert(is_contained(MinOrMaxPhiR->users(), MinOrMaxOp) &&
1832 "one user must be MinOrMaxOp");
1833 assert(MinOrMaxResult && "MinOrMaxResult must be a user of MinOrMaxOp");
1834
1835 // Cmp must be used by the select of a FindLastIV chain.
1836 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
1837 VPValue *IVOp, *FindIV;
1838 if (!Sel || Sel->getNumUsers() != 2 ||
1839 !match(Sel,
1841 return false;
1842
1844 std::swap(FindIV, IVOp);
1845 Pred = CmpInst::getInversePredicate(Pred);
1846 }
1847
1848 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
1850 FindIVPhiR->getRecurrenceKind()))
1851 return false;
1852
1853 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
1854 "cannot handle inloop/ordered reductions yet");
1855
1856 // Check if FindIVPhiR is a FindLast pattern by checking the MinMaxKind
1857 // on its ComputeReductionResult. SMax/UMax indicates FindLast.
1858 VPInstruction *FindIVResult =
1860 FindIVPhiR->getBackedgeValue());
1861 assert(FindIVResult &&
1862 "must be able to retrieve the FindIVResult VPInstruction");
1863 RecurKind FindIVMinMaxKind = FindIVResult->getRecurKind();
1864 if (FindIVMinMaxKind != RecurKind::SMax &&
1865 FindIVMinMaxKind != RecurKind::UMax)
1866 return false;
1867
1868 // TODO: Support cases where IVOp is the IV increment.
1869 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
1871 return false;
1872
1873 // Check if the predicate is compatible with the reduction kind.
1874 bool IsValidKindPred = [RdxKind, Pred]() {
1875 switch (RdxKind) {
1876 case RecurKind::UMin:
1877 return Pred == CmpInst::ICMP_UGE || Pred == CmpInst::ICMP_UGT;
1878 case RecurKind::UMax:
1879 return Pred == CmpInst::ICMP_ULE || Pred == CmpInst::ICMP_ULT;
1880 case RecurKind::SMax:
1881 return Pred == CmpInst::ICMP_SLE || Pred == CmpInst::ICMP_SLT;
1882 case RecurKind::SMin:
1883 return Pred == CmpInst::ICMP_SGE || Pred == CmpInst::ICMP_SGT;
1884 default:
1885 llvm_unreachable("unhandled recurrence kind");
1886 }
1887 }();
1888 if (!IsValidKindPred) {
1889 ORE->emit([&]() {
1891 DEBUG_TYPE, "VectorizationMultiUseReductionPredicate",
1892 TheLoop->getStartLoc(), TheLoop->getHeader())
1893 << "Multi-use reduction with predicate "
1895 << " incompatible with reduction kind";
1896 });
1897 return false;
1898 }
1899
1900 auto *FindIVSelect = findFindIVSelect(FindIVPhiR->getBackedgeValue());
1901 auto *FindIVCmp = FindIVSelect->getOperand(0)->getDefiningRecipe();
1902 auto *FindIVRdxResult = cast<VPInstruction>(FindIVCmp->getOperand(0));
1903 assert(FindIVSelect->getParent() == MinOrMaxResult->getParent() &&
1904 "both results must be computed in the same block");
1905 // Reducing to a scalar min or max value is placed right before reducing to
1906 // its scalar iteration, in order to generate instructions that use both
1907 // their operands.
1908 MinOrMaxResult->moveBefore(*FindIVRdxResult->getParent(),
1909 FindIVRdxResult->getIterator());
1910
1911 bool IsStrictPredicate = ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred);
1912 if (IsStrictPredicate) {
1913 if (!handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR,
1915 MinOrMaxResult, FindIVSelect, FindIVCmp,
1916 FindIVRdxResult))
1917 return false;
1918 continue;
1919 }
1920
1921 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1922 // result:
1923 // 1. We need to find the last IV for which the condition based on the
1924 // min/max recurrence is true,
1925 // 2. Compare the partial min/max reduction result to its final value and,
1926 // 3. Select the lanes of the partial FindLastIV reductions which
1927 // correspond to the lanes matching the min/max reduction result.
1928 //
1929 // For example, this transforms
1930 // vp<%min.result> = compute-reduction-result ir<%min.val.next>
1931 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
1932 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
1933 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
1934 //
1935 // into:
1936 //
1937 // vp<min.result> = compute-reduction-result ir<%min.val.next>
1938 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
1939 // vp<%final.iv> = select vp<%final.min.cmp>, vp<%min.idx.next>, SENTINEL
1940 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%final.iv>
1941 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
1942 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
1943 //
1944 VPBuilder B(FindIVRdxResult);
1945 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
1946 auto *FinalMinOrMaxCmp =
1947 B.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
1948 VPValue *Sentinel = FindIVCmp->getOperand(1);
1949 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
1950 auto *FinalIVSelect =
1951 B.createSelect(FinalMinOrMaxCmp, LastIVExiting, Sentinel);
1952 FindIVRdxResult->setOperand(0, FinalIVSelect);
1953 }
1954 return true;
1955}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
#define _
iv Induction Variable Users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static bool handleFirstArgMinOrMax(VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR, VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV, VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect, VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult)
Given a first argmin/argmax pattern with strict predicate consisting of 1) a MinOrMax reduction MinOr...
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void insertCheckBlockBeforeVectorLoop(VPlan &Plan, VPBasicBlock *CheckBlockVPBB)
Insert CheckBlockVPBB on the edge leading to the vector preheader, connecting it to both vector and s...
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB, VPValue *Cond, bool AddBranchWeights)
Create a BranchOnCond terminator in CheckBlockVPBB.
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static VPInstruction * findFindIVSelect(VPValue *BackedgeVal)
Find and return the final select instruction of the FindIV result pattern for the given BackedgeVal: ...
static constexpr uint32_t CheckBypassWeights[]
static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB, VPBasicBlock *MiddleVPBB, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Check if all loads in the loop are dereferenceable.
static void printAfterInitialConstruction(VPlan &)
To make RUN_VPLAN_PASS print initial VPlan.
static auto getMatchingPhisForScalarLoop(VPBasicBlock *Header, VPBasicBlock *ScalarHeader)
Return an iterator range to iterate over pairs of matching phi nodes in Header and ScalarHeader,...
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS_NO_VERIFY(PASS,...)
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI StringRef getPredicateName(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
static FastMathFlags getFast()
Definition FMF.h:53
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:653
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
iterator find(const KeyT &Key)
Definition MapVector.h:154
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
FastMathFlags getFastMathFlags() const
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
op_range operands()
Definition User.h:267
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4269
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4344
iterator end()
Definition VPlan.h:4306
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4304
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4357
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:565
const VPRecipeBase & front() const
Definition VPlan.h:4316
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:644
const VPRecipeBase & back() const
Definition VPlan.h:4318
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4335
bool empty() const
Definition VPlan.h:4315
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:98
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:319
VPRegionBlock * getParent()
Definition VPlan.h:190
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:202
void setName(const Twine &newName)
Definition VPlan.h:183
size_t getNumSuccessors() const
Definition VPlan.h:241
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:341
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:310
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:226
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:301
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:237
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:333
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:290
void setParent(VPRegionBlock *P)
Definition VPlan.h:201
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:231
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:215
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:273
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:170
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:300
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:218
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:236
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:256
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3831
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:4001
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2306
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2348
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2337
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4422
Class to record and manage LLVM IR flags.
Definition VPlan.h:690
RecurKind getRecurKind() const
Definition VPlan.h:1053
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1225
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1336
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1327
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1343
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:406
VPBasicBlock * getParent()
Definition VPlan.h:481
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:555
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2700
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2761
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2740
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2764
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3063
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4457
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4568
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4555
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:607
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:296
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:340
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:335
void addOperand(VPValue *Operand)
Definition VPlanValue.h:329
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:196
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1428
unsigned getNumUsers() const
Definition VPlanValue.h:107
user_range users()
Definition VPlanValue.h:149
A Recipe for widening the canonical induction variable of the vector loop.
Definition VPlan.h:3964
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2403
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2423
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2454
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2501
bool isCanonical() const
Returns true if the induction is canonical, i.e.
A recipe for widened phis.
Definition VPlan.h:2590
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4587
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4895
LLVMContext & getContext() const
Definition VPlan.h:4778
VPBasicBlock * getEntry()
Definition VPlan.h:4679
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4737
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4758
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4866
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4776
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4898
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4727
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4766
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4843
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1058
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4744
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4704
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4921
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1277
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4863
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4931
bool hasScalarVFOnly() const
Definition VPlan.h:4811
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4718
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4723
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4684
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4769
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4974
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4877
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
bool matchFindIVResult(VPInstruction *VPI, Op0_t ReducedIV, Op1_t Start)
Match FindIV result pattern: select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),...
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
Definition VPlanUtils.h:94
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
Definition VPlanUtils.h:111
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
Definition VPlanUtils.h:132
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:841
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2687
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:83
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition VPlanCFG.h:266
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:289
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
A recipe for handling first-order recurrence phis.
Definition VPlan.h:2638
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:207
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static void createInLoopReductionRecipes(VPlan &Plan, const DenseSet< BasicBlock * > &BlocksNeedingPredication, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *TripCount, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...