LLVM 22.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/CFG.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/Value.h"
43#include "llvm/Support/Debug.h"
49#include <cassert>
50#include <string>
51
52using namespace llvm;
53using namespace llvm::VPlanPatternMatch;
54
55namespace llvm {
57}
58
59/// @{
60/// Metadata attribute names
61const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
63 "llvm.loop.vectorize.followup_vectorized";
65 "llvm.loop.vectorize.followup_epilogue";
66/// @}
67
69
71 "vplan-print-in-dot-format", cl::Hidden,
72 cl::desc("Use dot format instead of plain text when dumping VPlans"));
73
74#define DEBUG_TYPE "loop-vectorize"
75
76#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
78 const VPBasicBlock *Parent = R.getParent();
79 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
80 R.print(OS, "", SlotTracker);
81 return OS;
82}
83#endif
84
86 const ElementCount &VF) const {
87 switch (LaneKind) {
89 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
90 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
91 Builder.getInt32(VF.getKnownMinValue() - Lane));
93 return Builder.getInt32(Lane);
94 }
95 llvm_unreachable("Unknown lane kind");
96}
97
98VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
99 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
100 if (Def)
101 Def->addDefinedValue(this);
102}
103
105 assert(Users.empty() && "trying to delete a VPValue with remaining users");
106 if (Def)
107 Def->removeDefinedValue(this);
108}
109
110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
113 R->print(OS, "", SlotTracker);
114 else
116}
117
118void VPValue::dump() const {
119 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
121 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
123 dbgs() << "\n";
124}
125
126void VPDef::dump() const {
127 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
129 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
130 print(dbgs(), "", SlotTracker);
131 dbgs() << "\n";
132}
133#endif
134
138
142
143// Get the top-most entry block of \p Start. This is the entry block of the
144// containing VPlan. This function is templated to support both const and non-const blocks
145template <typename T> static T *getPlanEntry(T *Start) {
146 T *Next = Start;
147 T *Current = Start;
148 while ((Next = Next->getParent()))
149 Current = Next;
150
151 SmallSetVector<T *, 8> WorkList;
152 WorkList.insert(Current);
153
154 for (unsigned i = 0; i < WorkList.size(); i++) {
155 T *Current = WorkList[i];
156 if (!Current->hasPredecessors())
157 return Current;
158 auto &Predecessors = Current->getPredecessors();
159 WorkList.insert_range(Predecessors);
160 }
161
162 llvm_unreachable("VPlan without any entry node without predecessors");
163}
164
165VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
166
167const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
168
169/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
176
183
184void VPBlockBase::setPlan(VPlan *ParentPlan) {
185 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
186 Plan = ParentPlan;
187}
188
189/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
191 const VPBlockBase *Block = this;
193 Block = Region->getExiting();
195}
196
203
205 if (!Successors.empty() || !Parent)
206 return this;
207 assert(Parent->getExiting() == this &&
208 "Block w/o successors not the exiting block of its parent.");
209 return Parent->getEnclosingBlockWithSuccessors();
210}
211
213 if (!Predecessors.empty() || !Parent)
214 return this;
215 assert(Parent->getEntry() == this &&
216 "Block w/o predecessors not the entry of its parent.");
217 return Parent->getEnclosingBlockWithPredecessors();
218}
219
221 iterator It = begin();
222 while (It != end() && It->isPhi())
223 It++;
224 return It;
225}
226
234
236 if (Def->isLiveIn())
237 return Def->getLiveInIRValue();
238
239 if (hasScalarValue(Def, Lane))
240 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
241
242 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
244 return Data.VPV2Scalars[Def][0];
245 }
246
247 // Look through BuildVector to avoid redundant extracts.
248 // TODO: Remove once replicate regions are unrolled explicitly.
249 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
250 auto *BuildVector = cast<VPInstruction>(Def);
251 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
252 }
253
255 auto *VecPart = Data.VPV2Vector[Def];
256 if (!VecPart->getType()->isVectorTy()) {
257 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
258 return VecPart;
259 }
260 // TODO: Cache created scalar values.
261 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
262 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
263 // set(Def, Extract, Instance);
264 return Extract;
265}
266
267Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
268 if (NeedsScalar) {
269 assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
271 (hasScalarValue(Def, VPLane(0)) &&
272 Data.VPV2Scalars[Def].size() == 1)) &&
273 "Trying to access a single scalar per part but has multiple scalars "
274 "per part.");
275 return get(Def, VPLane(0));
276 }
277
278 // If Values have been set for this Def return the one relevant for \p Part.
279 if (hasVectorValue(Def))
280 return Data.VPV2Vector[Def];
281
282 auto GetBroadcastInstrs = [this](Value *V) {
283 if (VF.isScalar())
284 return V;
285 // Broadcast the scalar into all locations in the vector.
286 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
287 return Shuf;
288 };
289
290 if (!hasScalarValue(Def, {0})) {
291 assert(Def->isLiveIn() && "expected a live-in");
292 Value *IRV = Def->getLiveInIRValue();
293 Value *B = GetBroadcastInstrs(IRV);
294 set(Def, B);
295 return B;
296 }
297
298 Value *ScalarValue = get(Def, VPLane(0));
299 // If we aren't vectorizing, we can just copy the scalar map values over
300 // to the vector map.
301 if (VF.isScalar()) {
302 set(Def, ScalarValue);
303 return ScalarValue;
304 }
305
306 bool IsSingleScalar = vputils::isSingleScalar(Def);
307 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
308
309 // We need to construct the vector value for a single-scalar value by
310 // broadcasting the scalar to all lanes.
311 // TODO: Replace by introducing Broadcast VPInstructions.
312 assert(IsSingleScalar && "must be a single-scalar at this point");
313 // Set the insert point after the last scalarized instruction or after the
314 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
315 // will directly follow the scalar definitions.
316 auto OldIP = Builder.saveIP();
317 auto *LastInst = cast<Instruction>(get(Def, LastLane));
318 auto NewIP = isa<PHINode>(LastInst)
319 ? LastInst->getParent()->getFirstNonPHIIt()
320 : std::next(BasicBlock::iterator(LastInst));
321 Builder.SetInsertPoint(&*NewIP);
322 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
323 set(Def, VectorValue);
324 Builder.restoreIP(OldIP);
325 return VectorValue;
326}
327
329 const DILocation *DIL = DL;
330 // When a FSDiscriminator is enabled, we don't need to add the multiply
331 // factors to the discriminators.
332 if (DIL &&
333 Builder.GetInsertBlock()
334 ->getParent()
335 ->shouldEmitDebugInfoForProfiling() &&
337 // FIXME: For scalable vectors, assume vscale=1.
338 unsigned UF = Plan->getUF();
339 auto NewDIL =
340 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
341 if (NewDIL)
342 Builder.SetCurrentDebugLocation(*NewDIL);
343 else
344 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
345 << DIL->getFilename() << " Line: " << DIL->getLine());
346 } else
347 Builder.SetCurrentDebugLocation(DL);
348}
349
351 Value *WideValue,
352 const VPLane &Lane) {
353 Value *ScalarInst = get(Def, Lane);
354 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
355 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
356 // We must handle each element of a vectorized struct type.
357 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
358 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
359 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
360 VectorValue =
361 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
362 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
363 }
364 } else {
365 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
366 }
367 return WideValue;
368}
369
370BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
371 auto &CFG = State.CFG;
372 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
373 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
374 BasicBlock *PrevBB = CFG.PrevBB;
375 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
376 PrevBB->getParent(), CFG.ExitBB);
377 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
378
379 return NewBB;
380}
381
383 auto &CFG = State.CFG;
384 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
385
386 // Register NewBB in its loop. In innermost loops its the same for all
387 // BB's.
388 Loop *ParentLoop = State.CurrentParentLoop;
389 // If this block has a sole successor that is an exit block or is an exit
390 // block itself then it needs adding to the same parent loop as the exit
391 // block.
392 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
393 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
394 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
395 ParentLoop = State.LI->getLoopFor(
396 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
397 }
398
399 if (ParentLoop && !State.LI->getLoopFor(NewBB))
400 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
401
403 if (VPBlockUtils::isHeader(this, State.VPDT)) {
404 // There's no block for the latch yet, connect to the preheader only.
405 Preds = {getPredecessors()[0]};
406 } else {
407 Preds = to_vector(getPredecessors());
408 }
409
410 // Hook up the new basic block to its predecessors.
411 for (VPBlockBase *PredVPBlock : Preds) {
412 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
413 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
414 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
415 "Predecessor basic-block not found building successor.");
416 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
417 auto *PredBBTerminator = PredBB->getTerminator();
418 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
419
420 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
421 if (isa<UnreachableInst>(PredBBTerminator)) {
422 assert(PredVPSuccessors.size() == 1 &&
423 "Predecessor ending w/o branch must have single successor.");
424 DebugLoc DL = PredBBTerminator->getDebugLoc();
425 PredBBTerminator->eraseFromParent();
426 auto *Br = BranchInst::Create(NewBB, PredBB);
427 Br->setDebugLoc(DL);
428 } else if (TermBr && !TermBr->isConditional()) {
429 TermBr->setSuccessor(0, NewBB);
430 } else {
431 // Set each forward successor here when it is created, excluding
432 // backedges. A backward successor is set when the branch is created.
433 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
434 // in the original IR, except when the predecessor is the entry block.
435 // This enables including SCEV and memory runtime check blocks in VPlan.
436 // TODO: Remove exception by modeling the terminator of entry block using
437 // BranchOnCond.
438 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
439 assert((TermBr && (!TermBr->getSuccessor(idx) ||
440 (isa<VPIRBasicBlock>(this) &&
441 (TermBr->getSuccessor(idx) == NewBB ||
442 PredVPBlock == getPlan()->getEntry())))) &&
443 "Trying to reset an existing successor block.");
444 TermBr->setSuccessor(idx, NewBB);
445 }
446 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
447 }
448}
449
452 "VPIRBasicBlock can have at most two successors at the moment!");
453 // Move completely disconnected blocks to their final position.
454 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
455 IRBB->moveAfter(State->CFG.PrevBB);
456 State->Builder.SetInsertPoint(IRBB->getTerminator());
457 State->CFG.PrevBB = IRBB;
458 State->CFG.VPBB2IRBB[this] = IRBB;
459 executeRecipes(State, IRBB);
460 // Create a branch instruction to terminate IRBB if one was not created yet
461 // and is needed.
462 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
463 auto *Br = State->Builder.CreateBr(IRBB);
464 Br->setOperand(0, nullptr);
465 IRBB->getTerminator()->eraseFromParent();
466 } else {
467 assert(
468 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
469 "other blocks must be terminated by a branch");
470 }
471
472 connectToPredecessors(*State);
473}
474
475VPIRBasicBlock *VPIRBasicBlock::clone() {
476 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
477 for (VPRecipeBase &R : Recipes)
478 NewBlock->appendRecipe(R.clone());
479 return NewBlock;
480}
481
483 bool Replica = bool(State->Lane);
484 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
485
486 if (VPBlockUtils::isHeader(this, State->VPDT)) {
487 // Create and register the new vector loop.
488 Loop *PrevParentLoop = State->CurrentParentLoop;
489 State->CurrentParentLoop = State->LI->AllocateLoop();
490
491 // Insert the new loop into the loop nest and register the new basic blocks
492 // before calling any utilities such as SCEV that require valid LoopInfo.
493 if (PrevParentLoop)
494 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
495 else
496 State->LI->addTopLevelLoop(State->CurrentParentLoop);
497 }
498
499 auto IsReplicateRegion = [](VPBlockBase *BB) {
501 assert((!R || R->isReplicator()) &&
502 "only replicate region blocks should remain");
503 return R;
504 };
505 // 1. Create an IR basic block.
506 if ((Replica && this == getParent()->getEntry()) ||
507 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
508 // Reuse the previous basic block if the current VPBB is either
509 // * the entry to a replicate region, or
510 // * the exit of a replicate region.
511 State->CFG.VPBB2IRBB[this] = NewBB;
512 } else {
513 NewBB = createEmptyBasicBlock(*State);
514
515 State->Builder.SetInsertPoint(NewBB);
516 // Temporarily terminate with unreachable until CFG is rewired.
517 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
518 State->Builder.SetInsertPoint(Terminator);
519
520 State->CFG.PrevBB = NewBB;
521 State->CFG.VPBB2IRBB[this] = NewBB;
522 connectToPredecessors(*State);
523 }
524
525 // 2. Fill the IR basic block with IR instructions.
526 executeRecipes(State, NewBB);
527
528 // If this block is a latch, update CurrentParentLoop.
529 if (VPBlockUtils::isLatch(this, State->VPDT))
530 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
531}
532
533VPBasicBlock *VPBasicBlock::clone() {
534 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
535 for (VPRecipeBase &R : *this)
536 NewBlock->appendRecipe(R.clone());
537 return NewBlock;
538}
539
541 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
542 << " in BB: " << BB->getName() << '\n');
543
544 State->CFG.PrevVPBB = this;
545
546 for (VPRecipeBase &Recipe : Recipes) {
547 State->setDebugLocFrom(Recipe.getDebugLoc());
548 Recipe.execute(*State);
549 }
550
551 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
552}
553
554VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
555 assert((SplitAt == end() || SplitAt->getParent() == this) &&
556 "can only split at a position in the same block");
557
558 // Create new empty block after the block to split.
559 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
561
562 // Finally, move the recipes starting at SplitAt to new block.
563 for (VPRecipeBase &ToMove :
564 make_early_inc_range(make_range(SplitAt, this->end())))
565 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
566
567 return SplitBlock;
568}
569
570/// Return the enclosing loop region for region \p P. The templated version is
571/// used to support both const and non-const block arguments.
572template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
573 if (P && P->isReplicator()) {
574 P = P->getParent();
575 // Multiple loop regions can be nested, but replicate regions can only be
576 // nested inside a loop region or must be outside any other region.
577 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
578 }
579 return P;
580}
581
585
589
590static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
591 if (VPBB->empty()) {
592 assert(
593 VPBB->getNumSuccessors() < 2 &&
594 "block with multiple successors doesn't have a recipe as terminator");
595 return false;
596 }
597
598 const VPRecipeBase *R = &VPBB->back();
599 bool IsSwitch = isa<VPInstruction>(R) &&
600 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
601 bool IsCondBranch =
604 (void)IsCondBranch;
605 (void)IsSwitch;
606 if (VPBB->getNumSuccessors() == 2 ||
607 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
608 assert((IsCondBranch || IsSwitch) &&
609 "block with multiple successors not terminated by "
610 "conditional branch nor switch recipe");
611
612 return true;
613 }
614
615 if (VPBB->getNumSuccessors() > 2) {
616 assert(IsSwitch && "block with more than 2 successors not terminated by "
617 "a switch recipe");
618 return true;
619 }
620
621 assert(
622 !IsCondBranch &&
623 "block with 0 or 1 successors terminated by conditional branch recipe");
624 return false;
625}
626
628 if (hasConditionalTerminator(this))
629 return &back();
630 return nullptr;
631}
632
634 if (hasConditionalTerminator(this))
635 return &back();
636 return nullptr;
637}
638
640 return getParent() && getParent()->getExitingBasicBlock() == this;
641}
642
643#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
648
649void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
650 if (getSuccessors().empty()) {
651 O << Indent << "No successors\n";
652 } else {
653 O << Indent << "Successor(s): ";
654 ListSeparator LS;
655 for (auto *Succ : getSuccessors())
656 O << LS << Succ->getName();
657 O << '\n';
658 }
659}
660
661void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
662 VPSlotTracker &SlotTracker) const {
663 O << Indent << getName() << ":\n";
664
665 auto RecipeIndent = Indent + " ";
666 for (const VPRecipeBase &Recipe : *this) {
667 Recipe.print(O, RecipeIndent, SlotTracker);
668 O << '\n';
669 }
670
671 printSuccessors(O, Indent);
672}
673#endif
674
675static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
676
677// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
678// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
679// Remapping of operands must be done separately. Returns a pair with the new
680// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
681// region, return nullptr for the exiting block.
682static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
684 VPBlockBase *Exiting = nullptr;
685 bool InRegion = Entry->getParent();
686 // First, clone blocks reachable from Entry.
687 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
688 VPBlockBase *NewBB = BB->clone();
689 Old2NewVPBlocks[BB] = NewBB;
690 if (InRegion && BB->getNumSuccessors() == 0) {
691 assert(!Exiting && "Multiple exiting blocks?");
692 Exiting = BB;
693 }
694 }
695 assert((!InRegion || Exiting) && "regions must have a single exiting block");
696
697 // Second, update the predecessors & successors of the cloned blocks.
698 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
699 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
701 for (VPBlockBase *Pred : BB->getPredecessors()) {
702 NewPreds.push_back(Old2NewVPBlocks[Pred]);
703 }
704 NewBB->setPredecessors(NewPreds);
706 for (VPBlockBase *Succ : BB->successors()) {
707 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
708 }
709 NewBB->setSuccessors(NewSuccs);
710 }
711
712#if !defined(NDEBUG)
713 // Verify that the order of predecessors and successors matches in the cloned
714 // version.
715 for (const auto &[OldBB, NewBB] :
717 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
718 for (const auto &[OldPred, NewPred] :
719 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
720 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
721
722 for (const auto &[OldSucc, NewSucc] :
723 zip(OldBB->successors(), NewBB->successors()))
724 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
725 }
726#endif
727
728 return std::make_pair(Old2NewVPBlocks[Entry],
729 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
730}
731
732VPRegionBlock *VPRegionBlock::clone() {
733 const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
734 VPlan &Plan = *getPlan();
735 VPRegionBlock *NewRegion =
737 ? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
738 : Plan.createLoopRegion(getName(), NewEntry, NewExiting);
739
740 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
741 Block->setParent(NewRegion);
742 return NewRegion;
743}
744
747 "Loop regions should have been lowered to plain CFG");
748 assert(!State->Lane && "Replicating a Region with non-null instance.");
749 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
750
752 Entry);
753 State->Lane = VPLane(0);
754 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
755 State->Lane = VPLane(Lane, VPLane::Kind::First);
756 // Visit the VPBlocks connected to \p this, starting from it.
757 for (VPBlockBase *Block : RPOT) {
758 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
759 Block->execute(State);
760 }
761 }
762
763 // Exit replicating mode.
764 State->Lane.reset();
765}
766
769 for (VPRecipeBase &R : Recipes)
770 Cost += R.cost(VF, Ctx);
771 return Cost;
772}
773
774const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
775 const VPBlockBase *Pred = nullptr;
776 if (hasPredecessors()) {
777 Pred = getPredecessors()[Idx];
778 } else {
779 auto *Region = getParent();
780 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
781 "must be in the entry block of a non-replicate region");
782 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
783 "loop region has a single predecessor (preheader), its entry block "
784 "has 2 incoming blocks");
785
786 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
787 // region itself whose exiting block feeds the phi across the backedge.
788 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
789 }
790 return Pred->getExitingBasicBlock();
791}
792
794 if (!isReplicator()) {
797 Cost += Block->cost(VF, Ctx);
798 InstructionCost BackedgeCost =
799 ForceTargetInstructionCost.getNumOccurrences()
800 ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
801 : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind);
802 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
803 << ": vector loop backedge\n");
804 Cost += BackedgeCost;
805 return Cost;
806 }
807
808 // Compute the cost of a replicate region. Replicating isn't supported for
809 // scalable vectors, return an invalid cost for them.
810 // TODO: Discard scalable VPlans with replicate recipes earlier after
811 // construction.
812 if (VF.isScalable())
814
815 // Compute and return the cost of the conditionally executed recipes.
816 assert(VF.isVector() && "Can only compute vector cost at the moment.");
818 return Then->cost(VF, Ctx);
819}
820
821#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
823 VPSlotTracker &SlotTracker) const {
824 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
825 auto NewIndent = Indent + " ";
826 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
827 O << '\n';
828 BlockBase->print(O, NewIndent, SlotTracker);
829 }
830 O << Indent << "}\n";
831
832 printSuccessors(O, Indent);
833}
834#endif
835
837 auto *Header = cast<VPBasicBlock>(getEntry());
838 if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
839 assert(this == getPlan()->getVectorLoopRegion() &&
840 "Canonical IV must be in the entry of the top-level loop region");
841 auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
842 {CanIV->getStartValue(), CanIV->getBackedgeValue()},
843 CanIV->getDebugLoc(), "index");
844 CanIV->replaceAllUsesWith(ScalarR);
845 CanIV->eraseFromParent();
846 }
847
848 VPBlockBase *Preheader = getSinglePredecessor();
849 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
851 VPBlockUtils::disconnectBlocks(Preheader, this);
852 VPBlockUtils::disconnectBlocks(this, Middle);
853
854 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
855 VPB->setParent(getParent());
856
857 VPBlockUtils::connectBlocks(Preheader, Header);
858 VPBlockUtils::connectBlocks(ExitingLatch, Middle);
859 VPBlockUtils::connectBlocks(ExitingLatch, Header);
860}
861
862VPlan::VPlan(Loop *L) {
863 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
864 ScalarHeader = createVPIRBasicBlock(L->getHeader());
865
866 SmallVector<BasicBlock *> IRExitBlocks;
867 L->getUniqueExitBlocks(IRExitBlocks);
868 for (BasicBlock *EB : IRExitBlocks)
869 ExitBlocks.push_back(createVPIRBasicBlock(EB));
870}
871
873 VPValue DummyValue;
874
875 for (auto *VPB : CreatedBlocks) {
876 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
877 // Replace all operands of recipes and all VPValues defined in VPBB with
878 // DummyValue so the block can be deleted.
879 for (VPRecipeBase &R : *VPBB) {
880 for (auto *Def : R.definedValues())
881 Def->replaceAllUsesWith(&DummyValue);
882
883 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
884 R.setOperand(I, &DummyValue);
885 }
886 }
887 delete VPB;
888 }
889 for (VPValue *VPV : getLiveIns())
890 delete VPV;
891 if (BackedgeTakenCount)
892 delete BackedgeTakenCount;
893}
894
896 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
897 return VPIRBB->getIRBasicBlock() == IRBB;
898 });
899 assert(Iter != getExitBlocks().end() && "no exit block found");
900 return *Iter;
901}
902
904 return is_contained(ExitBlocks, VPBB);
905}
906
907/// Generate the code inside the preheader and body of the vectorized loop.
908/// Assumes a single pre-header basic-block was created for this. Introduce
909/// additional basic-blocks as needed, and fill them all.
911 // Initialize CFG state.
912 State->CFG.PrevVPBB = nullptr;
913 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
914
915 // Update VPDominatorTree since VPBasicBlock may be removed after State was
916 // constructed.
917 State->VPDT.recalculate(*this);
918
919 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
920 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
921 cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
922 State->CFG.DTU.applyUpdates(
923 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
924
925 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
926 << ", UF=" << getUF() << '\n');
927 setName("Final VPlan");
928 LLVM_DEBUG(dump());
929
930 BasicBlock *ScalarPh = State->CFG.ExitBB;
931 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
932 if (ScalarPhVPBB->hasPredecessors()) {
933 // Disconnect scalar preheader and scalar header, as the dominator tree edge
934 // will be updated as part of VPlan execution. This allows keeping the DTU
935 // logic generic during VPlan execution.
936 State->CFG.DTU.applyUpdates(
937 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
938 }
940 Entry);
941 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
942 // successor blocks including the middle, exit and scalar preheader blocks.
943 for (VPBlockBase *Block : RPOT)
944 Block->execute(State);
945
946 // If the original loop is unreachable, delete it and all its blocks.
947 if (!ScalarPhVPBB->hasPredecessors()) {
948 // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
949 // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
950 // IR instructions.
951 for (VPIRBasicBlock *EB : getExitBlocks()) {
952 for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
953 if (R.getNumOperands() == 1)
954 R.eraseFromParent();
955 }
956 }
957
958 Loop *OrigLoop =
959 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
960 auto Blocks = OrigLoop->getBlocksVector();
961 Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock());
962 for (auto *BB : Blocks)
963 State->LI->removeBlock(BB);
964 DeleteDeadBlocks(Blocks, &State->CFG.DTU);
965 State->LI->erase(OrigLoop);
966 }
967
968 State->CFG.DTU.flush();
969
970 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
971 if (!Header)
972 return;
973
974 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
975 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
976
977 // Fix the latch value of canonical, reduction and first-order recurrences
978 // phis in the vector loop.
979 for (VPRecipeBase &R : Header->phis()) {
980 // Skip phi-like recipes that generate their backedege values themselves.
981 if (isa<VPWidenPHIRecipe>(&R))
982 continue;
983
984 auto *PhiR = cast<VPSingleDefRecipe>(&R);
985 // VPInstructions currently model scalar Phis only.
986 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
988 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
989
990 Value *Phi = State->get(PhiR, NeedsScalar);
991 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
992 // not.
993 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
994 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
995 }
996}
997
999 // For now only return the cost of the vector loop region, ignoring any other
1000 // blocks, like the preheader or middle blocks, expect for checking them for
1001 // recipes with invalid costs.
1003
1004 // If the cost of the loop region is invalid or any recipe in the skeleton
1005 // outside loop regions are invalid return an invalid cost.
1008 [&VF, &Ctx](VPBasicBlock *VPBB) {
1009 return !VPBB->cost(VF, Ctx).isValid();
1010 }))
1012
1013 return Cost;
1014}
1015
1017 // TODO: Cache if possible.
1019 if (auto *R = dyn_cast<VPRegionBlock>(B))
1020 return R->isReplicator() ? nullptr : R;
1021 return nullptr;
1022}
1023
1026 if (auto *R = dyn_cast<VPRegionBlock>(B))
1027 return R->isReplicator() ? nullptr : R;
1028 return nullptr;
1029}
1030
1031#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1034
1035 if (VF.getNumUsers() > 0) {
1036 O << "\nLive-in ";
1037 VF.printAsOperand(O, SlotTracker);
1038 O << " = VF";
1039 }
1040
1041 if (VFxUF.getNumUsers() > 0) {
1042 O << "\nLive-in ";
1043 VFxUF.printAsOperand(O, SlotTracker);
1044 O << " = VF * UF";
1045 }
1046
1047 if (VectorTripCount.getNumUsers() > 0) {
1048 O << "\nLive-in ";
1049 VectorTripCount.printAsOperand(O, SlotTracker);
1050 O << " = vector-trip-count";
1051 }
1052
1053 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1054 O << "\nLive-in ";
1055 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1056 O << " = backedge-taken count";
1057 }
1058
1059 O << "\n";
1060 if (TripCount) {
1061 if (TripCount->isLiveIn())
1062 O << "Live-in ";
1063 TripCount->printAsOperand(O, SlotTracker);
1064 O << " = original trip-count";
1065 O << "\n";
1066 }
1067}
1068
1072
1073 O << "VPlan '" << getName() << "' {";
1074
1075 printLiveIns(O);
1076
1078 RPOT(getEntry());
1079 for (const VPBlockBase *Block : RPOT) {
1080 O << '\n';
1081 Block->print(O, "", SlotTracker);
1082 }
1083
1084 O << "}\n";
1085}
1086
1087std::string VPlan::getName() const {
1088 std::string Out;
1089 raw_string_ostream RSO(Out);
1090 RSO << Name << " for ";
1091 if (!VFs.empty()) {
1092 RSO << "VF={" << VFs[0];
1093 for (ElementCount VF : drop_begin(VFs))
1094 RSO << "," << VF;
1095 RSO << "},";
1096 }
1097
1098 if (UFs.empty()) {
1099 RSO << "UF>=1";
1100 } else {
1101 RSO << "UF={" << UFs[0];
1102 for (unsigned UF : drop_begin(UFs))
1103 RSO << "," << UF;
1104 RSO << "}";
1105 }
1106
1107 return Out;
1108}
1109
1112 VPlanPrinter Printer(O, *this);
1113 Printer.dump();
1114}
1115
1117void VPlan::dump() const { print(dbgs()); }
1118#endif
1119
1120static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1121 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1122 // Update the operands of all cloned recipes starting at NewEntry. This
1123 // traverses all reachable blocks. This is done in two steps, to handle cycles
1124 // in PHI recipes.
1126 OldDeepRPOT(Entry);
1128 NewDeepRPOT(NewEntry);
1129 // First, collect all mappings from old to new VPValues defined by cloned
1130 // recipes.
1131 for (const auto &[OldBB, NewBB] :
1134 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1135 "blocks must have the same number of recipes");
1136 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1137 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1138 "recipes must have the same number of operands");
1139 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1140 "recipes must define the same number of operands");
1141 for (const auto &[OldV, NewV] :
1142 zip(OldR.definedValues(), NewR.definedValues()))
1143 Old2NewVPValues[OldV] = NewV;
1144 }
1145 }
1146
1147 // Update all operands to use cloned VPValues.
1148 for (VPBasicBlock *NewBB :
1150 for (VPRecipeBase &NewR : *NewBB)
1151 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1152 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1153 NewR.setOperand(I, NewOp);
1154 }
1155 }
1156}
1157
1159 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1160 // Clone blocks.
1161 const auto &[NewEntry, __] = cloneFrom(Entry);
1162
1163 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1164 VPIRBasicBlock *NewScalarHeader = nullptr;
1165 if (getScalarHeader()->hasPredecessors()) {
1166 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1167 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1168 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1169 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1170 }));
1171 } else {
1172 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1173 }
1174 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1175 auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1176 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1177 for (VPValue *OldLiveIn : getLiveIns()) {
1178 Old2NewVPValues[OldLiveIn] =
1179 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1180 }
1181 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1182 Old2NewVPValues[&VF] = &NewPlan->VF;
1183 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1184 if (BackedgeTakenCount) {
1185 NewPlan->BackedgeTakenCount = new VPValue();
1186 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1187 }
1188 if (TripCount && TripCount->isLiveIn())
1189 Old2NewVPValues[TripCount] =
1190 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1191 // else NewTripCount will be created and inserted into Old2NewVPValues when
1192 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1193
1194 remapOperands(Entry, NewEntry, Old2NewVPValues);
1195
1196 // Initialize remaining fields of cloned VPlan.
1197 NewPlan->VFs = VFs;
1198 NewPlan->UFs = UFs;
1199 // TODO: Adjust names.
1200 NewPlan->Name = Name;
1201 if (TripCount) {
1202 assert(Old2NewVPValues.contains(TripCount) &&
1203 "TripCount must have been added to Old2NewVPValues");
1204 NewPlan->TripCount = Old2NewVPValues[TripCount];
1205 }
1206
1207 // Transfer all cloned blocks (the second half of all current blocks) from
1208 // current to new VPlan.
1209 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1210 for (unsigned I :
1211 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1212 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1213 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1214
1215 // Update ExitBlocks of the new plan.
1216 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1217 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1218 VPB != NewScalarHeader)
1219 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1220 }
1221
1222 return NewPlan;
1223}
1224
1226 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1227 CreatedBlocks.push_back(VPIRBB);
1228 return VPIRBB;
1229}
1230
1232 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1233 for (Instruction &I :
1234 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1235 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1236 return VPIRBB;
1237}
1238
1239#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1240
1241Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1242 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1243 Twine(getOrCreateBID(Block));
1244}
1245
1246Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1247 const std::string &Name = Block->getName();
1248 if (!Name.empty())
1249 return Name;
1250 return "VPB" + Twine(getOrCreateBID(Block));
1251}
1252
1254 Depth = 1;
1255 bumpIndent(0);
1256 OS << "digraph VPlan {\n";
1257 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1258 if (!Plan.getName().empty())
1259 OS << "\\n" << DOT::EscapeString(Plan.getName());
1260
1261 {
1262 // Print live-ins.
1263 std::string Str;
1264 raw_string_ostream SS(Str);
1265 Plan.printLiveIns(SS);
1267 StringRef(Str).rtrim('\n').split(Lines, "\n");
1268 for (auto Line : Lines)
1269 OS << DOT::EscapeString(Line.str()) << "\\n";
1270 }
1271
1272 OS << "\"]\n";
1273 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1274 OS << "edge [fontname=Courier, fontsize=30]\n";
1275 OS << "compound=true\n";
1276
1277 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1278 dumpBlock(Block);
1279
1280 OS << "}\n";
1281}
1282
1283void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1285 dumpBasicBlock(BasicBlock);
1287 dumpRegion(Region);
1288 else
1289 llvm_unreachable("Unsupported kind of VPBlock.");
1290}
1291
1292void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1293 bool Hidden, const Twine &Label) {
1294 // Due to "dot" we print an edge between two regions as an edge between the
1295 // exiting basic block and the entry basic of the respective regions.
1296 const VPBlockBase *Tail = From->getExitingBasicBlock();
1297 const VPBlockBase *Head = To->getEntryBasicBlock();
1298 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1299 OS << " [ label=\"" << Label << '\"';
1300 if (Tail != From)
1301 OS << " ltail=" << getUID(From);
1302 if (Head != To)
1303 OS << " lhead=" << getUID(To);
1304 if (Hidden)
1305 OS << "; splines=none";
1306 OS << "]\n";
1307}
1308
1309void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1310 auto &Successors = Block->getSuccessors();
1311 if (Successors.size() == 1)
1312 drawEdge(Block, Successors.front(), false, "");
1313 else if (Successors.size() == 2) {
1314 drawEdge(Block, Successors.front(), false, "T");
1315 drawEdge(Block, Successors.back(), false, "F");
1316 } else {
1317 unsigned SuccessorNumber = 0;
1318 for (auto *Successor : Successors)
1319 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1320 }
1321}
1322
1323void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1324 // Implement dot-formatted dump by performing plain-text dump into the
1325 // temporary storage followed by some post-processing.
1326 OS << Indent << getUID(BasicBlock) << " [label =\n";
1327 bumpIndent(1);
1328 std::string Str;
1329 raw_string_ostream SS(Str);
1330 // Use no indentation as we need to wrap the lines into quotes ourselves.
1331 BasicBlock->print(SS, "", SlotTracker);
1332
1333 // We need to process each line of the output separately, so split
1334 // single-string plain-text dump.
1336 StringRef(Str).rtrim('\n').split(Lines, "\n");
1337
1338 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1339 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1340 };
1341
1342 // Don't need the "+" after the last line.
1343 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1344 EmitLine(Line, " +\n");
1345 EmitLine(Lines.back(), "\n");
1346
1347 bumpIndent(-1);
1348 OS << Indent << "]\n";
1349
1350 dumpEdges(BasicBlock);
1351}
1352
1353void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1354 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1355 bumpIndent(1);
1356 OS << Indent << "fontname=Courier\n"
1357 << Indent << "label=\""
1358 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1359 << DOT::EscapeString(Region->getName()) << "\"\n";
1360 // Dump the blocks of the region.
1361 assert(Region->getEntry() && "Region contains no inner blocks.");
1362 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1363 dumpBlock(Block);
1364 bumpIndent(-1);
1365 OS << Indent << "}\n";
1366 dumpEdges(Region);
1367}
1368
1369#endif
1370
1371/// Returns true if there is a vector loop region and \p VPV is defined in a
1372/// loop region.
1373static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1374 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1375 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1377}
1378
1383 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1384}
1385
1387 VPValue *New,
1388 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1389 // Note that this early exit is required for correctness; the implementation
1390 // below relies on the number of users for this VPValue to decrease, which
1391 // isn't the case if this == New.
1392 if (this == New)
1393 return;
1394
1395 for (unsigned J = 0; J < getNumUsers();) {
1396 VPUser *User = Users[J];
1397 bool RemovedUser = false;
1398 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1399 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1400 continue;
1401
1402 RemovedUser = true;
1403 User->setOperand(I, New);
1404 }
1405 // If a user got removed after updating the current user, the next user to
1406 // update will be moved to the current position, so we only need to
1407 // increment the index if the number of users did not change.
1408 if (!RemovedUser)
1409 J++;
1410 }
1411}
1412
1414 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1415 if (getOperand(Idx) == From)
1416 setOperand(Idx, To);
1417 }
1418}
1419
1420#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1422 OS << Tracker.getOrCreateName(this);
1423}
1424
1427 Op->printAsOperand(O, SlotTracker);
1428 });
1429}
1430#endif
1431
1432void VPSlotTracker::assignName(const VPValue *V) {
1433 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1434 auto *UV = V->getUnderlyingValue();
1435 auto *VPI = dyn_cast_or_null<VPInstruction>(V->getDefiningRecipe());
1436 if (!UV && !(VPI && !VPI->getName().empty())) {
1437 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1438 NextSlot++;
1439 return;
1440 }
1441
1442 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1443 // appending ".Number" to the name if there are multiple uses.
1444 std::string Name;
1445 if (UV)
1446 Name = getName(UV);
1447 else
1448 Name = VPI->getName();
1449
1450 assert(!Name.empty() && "Name cannot be empty.");
1451 StringRef Prefix = UV ? "ir<" : "vp<%";
1452 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1453
1454 // First assign the base name for V.
1455 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1456 // Integer or FP constants with different types will result in he same string
1457 // due to stripping types.
1458 if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1459 return;
1460
1461 // If it is already used by C > 0 other VPValues, increase the version counter
1462 // C and use it for V.
1463 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1464 if (!UseInserted) {
1465 C->second++;
1466 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1467 }
1468}
1469
1470void VPSlotTracker::assignNames(const VPlan &Plan) {
1471 if (Plan.VF.getNumUsers() > 0)
1472 assignName(&Plan.VF);
1473 if (Plan.VFxUF.getNumUsers() > 0)
1474 assignName(&Plan.VFxUF);
1475 assignName(&Plan.VectorTripCount);
1476 if (Plan.BackedgeTakenCount)
1477 assignName(Plan.BackedgeTakenCount);
1478 for (VPValue *LI : Plan.getLiveIns())
1479 assignName(LI);
1480
1481 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1482 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1483 for (const VPBasicBlock *VPBB :
1485 assignNames(VPBB);
1486}
1487
1488void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1489 for (const VPRecipeBase &Recipe : *VPBB)
1490 for (VPValue *Def : Recipe.definedValues())
1491 assignName(Def);
1492}
1493
1494std::string VPSlotTracker::getName(const Value *V) {
1495 std::string Name;
1496 raw_string_ostream S(Name);
1497 if (V->hasName() || !isa<Instruction>(V)) {
1498 V->printAsOperand(S, false);
1499 return Name;
1500 }
1501
1502 if (!MST) {
1503 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1504 // instruction.
1505 auto *I = cast<Instruction>(V);
1506 // This check is required to support unit tests with incomplete IR.
1507 if (I->getParent()) {
1508 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1509 MST->incorporateFunction(*I->getFunction());
1510 } else {
1511 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1512 }
1513 }
1514 V->printAsOperand(S, false, *MST);
1515 return Name;
1516}
1517
1518std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1519 std::string Name = VPValue2Name.lookup(V);
1520 if (!Name.empty())
1521 return Name;
1522
1523 // If no name was assigned, no VPlan was provided when creating the slot
1524 // tracker or it is not reachable from the provided VPlan. This can happen,
1525 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1526 // in a debugger.
1527 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1528 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1529 // here.
1530 const VPRecipeBase *DefR = V->getDefiningRecipe();
1531 (void)DefR;
1532 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1533 "VPValue defined by a recipe in a VPlan?");
1534
1535 // Use the underlying value's name, if there is one.
1536 if (auto *UV = V->getUnderlyingValue()) {
1537 std::string Name;
1538 raw_string_ostream S(Name);
1539 UV->printAsOperand(S, false);
1540 return (Twine("ir<") + Name + ">").str();
1541 }
1542
1543 return "<badref>";
1544}
1545
1547 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1548 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1549 bool PredicateAtRangeStart = Predicate(Range.Start);
1550
1551 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1552 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1553 Range.End = TmpVF;
1554 break;
1555 }
1556
1557 return PredicateAtRangeStart;
1558}
1559
1560/// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1561/// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1562/// of VF's starting at a given VF and extending it as much as possible. Each
1563/// vectorization decision can potentially shorten this sub-range during
1564/// buildVPlan().
1566 ElementCount MaxVF) {
1567 auto MaxVFTimes2 = MaxVF * 2;
1568 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1569 VFRange SubRange = {VF, MaxVFTimes2};
1570 if (auto Plan = tryToBuildVPlan(SubRange)) {
1572 // Update the name of the latch of the top-level vector loop region region
1573 // after optimizations which includes block folding.
1574 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1575 VPlans.push_back(std::move(Plan));
1576 }
1577 VF = SubRange.End;
1578 }
1579}
1580
1582 assert(count_if(VPlans,
1583 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1584 1 &&
1585 "Multiple VPlans for VF.");
1586
1587 for (const VPlanPtr &Plan : VPlans) {
1588 if (Plan->hasVF(VF))
1589 return *Plan.get();
1590 }
1591 llvm_unreachable("No plan found!");
1592}
1593
1596 // Reserve first location for self reference to the LoopID metadata node.
1597 MDs.push_back(nullptr);
1598 bool IsUnrollMetadata = false;
1599 MDNode *LoopID = L->getLoopID();
1600 if (LoopID) {
1601 // First find existing loop unrolling disable metadata.
1602 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1603 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1604 if (MD) {
1605 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1606 if (!S)
1607 continue;
1608 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1609 continue;
1610 IsUnrollMetadata =
1611 S->getString().starts_with("llvm.loop.unroll.disable");
1612 }
1613 MDs.push_back(LoopID->getOperand(I));
1614 }
1615 }
1616
1617 if (!IsUnrollMetadata) {
1618 // Add runtime unroll disable metadata.
1619 LLVMContext &Context = L->getHeader()->getContext();
1620 SmallVector<Metadata *, 1> DisableOperands;
1621 DisableOperands.push_back(
1622 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1623 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1624 MDs.push_back(DisableNode);
1625 MDNode *NewLoopID = MDNode::get(Context, MDs);
1626 // Set operand 0 to refer to the loop id itself.
1627 NewLoopID->replaceOperandWith(0, NewLoopID);
1628 L->setLoopID(NewLoopID);
1629 }
1630}
1631
1633 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1634 bool VectorizingEpilogue, MDNode *OrigLoopID,
1635 std::optional<unsigned> OrigAverageTripCount,
1636 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1637 bool DisableRuntimeUnroll) {
1638 // Update the metadata of the scalar loop. Skip the update when vectorizing
1639 // the epilogue loop to ensure it is updated only once. Also skip the update
1640 // when the scalar loop became unreachable.
1641 if (Plan.getScalarPreheader()->hasPredecessors() && !VectorizingEpilogue) {
1642 std::optional<MDNode *> RemainderLoopID =
1645 if (RemainderLoopID) {
1646 OrigLoop->setLoopID(*RemainderLoopID);
1647 } else {
1648 if (DisableRuntimeUnroll)
1650
1651 LoopVectorizeHints Hints(OrigLoop, true, *ORE);
1652 Hints.setAlreadyVectorized();
1653 }
1654 }
1655
1656 if (!VectorLoop)
1657 return;
1658
1659 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1660 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1662 VectorLoop->setLoopID(*VectorizedLoopID);
1663 } else {
1664 // Keep all loop hints from the original loop on the vector loop (we'll
1665 // replace the vectorizer-specific hints below).
1666 if (OrigLoopID)
1667 VectorLoop->setLoopID(OrigLoopID);
1668
1669 if (!VectorizingEpilogue) {
1670 LoopVectorizeHints Hints(VectorLoop, true, *ORE);
1671 Hints.setAlreadyVectorized();
1672 }
1673 }
1675 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1676 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1678
1679 // Set/update profile weights for the vector and remainder loops as original
1680 // loop iterations are now distributed among them. Note that original loop
1681 // becomes the scalar remainder loop after vectorization.
1682 //
1683 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1684 // end up getting slightly roughened result but that should be OK since
1685 // profile is not inherently precise anyway. Note also possible bypass of
1686 // vector code caused by legality checks is ignored, assigning all the weight
1687 // to the vector loop, optimistically.
1688 //
1689 // For scalable vectorization we can't know at compile time how many
1690 // iterations of the loop are handled in one vector iteration, so instead
1691 // use the value of vscale used for tuning.
1692 if (!OrigAverageTripCount)
1693 return;
1694 // Calculate number of iterations in unrolled loop.
1695 unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1696 // Calculate number of iterations for remainder loop.
1697 unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1698
1699 if (HeaderVPBB) {
1700 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1701 OrigLoopInvocationWeight);
1702 }
1703 if (Plan.getScalarPreheader()->hasPredecessors()) {
1704 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1705 OrigLoopInvocationWeight);
1706 }
1707}
1708
1709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1711 if (VPlans.empty()) {
1712 O << "LV: No VPlans built.\n";
1713 return;
1714 }
1715 for (const auto &Plan : VPlans)
1717 Plan->printDOT(O);
1718 else
1719 Plan->print(O);
1720}
1721#endif
1722
1723bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
1725 APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
1726 unsigned WideSize = C->getBitWidth();
1727 APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
1728 ? TruncatedVal.sext(WideSize)
1729 : TruncatedVal.zext(WideSize);
1730 return ExtendedVal == *C;
1731}
1732
1735 if (!V->isLiveIn())
1736 return {};
1737
1738 return TTI::getOperandInfo(V->getLiveInIRValue());
1739}
1740
1742 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1743 bool AlwaysIncludeReplicatingR) {
1744 if (VF.isScalar())
1745 return 0;
1746
1747 assert(!VF.isScalable() &&
1748 "Scalarization overhead not supported for scalable vectors");
1749
1750 InstructionCost ScalarizationCost = 0;
1751 // Compute the cost of scalarizing the result if needed.
1752 if (!ResultTy->isVoidTy()) {
1753 for (Type *VectorTy :
1754 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1755 ScalarizationCost += TTI.getScalarizationOverhead(
1757 /*Insert=*/true,
1758 /*Extract=*/false, CostKind);
1759 }
1760 }
1761 // Compute the cost of scalarizing the operands, skipping ones that do not
1762 // require extraction/scalarization and do not incur any overhead.
1763 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1765 for (auto *Op : Operands) {
1766 if (Op->isLiveIn() ||
1767 (!AlwaysIncludeReplicatingR &&
1770 cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1771 !UniqueOperands.insert(Op).second)
1772 continue;
1773 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1774 }
1775 return ScalarizationCost +
1776 TTI.getOperandsScalarizationOverhead(Tys, CostKind);
1777}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
dxil pretty DXIL Metadata Pretty Printer
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
#define I(x, y, z)
Definition MD5.cpp:58
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1594
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:145
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:572
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:61
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1373
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:590
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:62
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1120
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:64
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Definition VPlan.cpp:682
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:124
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:325
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:321
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1581
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1632
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1565
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1546
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1710
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:526
Metadata node.
Definition Metadata.h:1078
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
void insert_range(Range &&R)
Definition SetVector.h:175
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:804
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3819
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:3894
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3846
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:482
iterator end()
Definition VPlan.h:3856
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3854
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:533
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:767
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:774
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:220
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:382
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:582
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:554
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3834
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:540
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:661
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:639
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:627
const VPRecipeBase & back() const
Definition VPlan.h:3868
bool empty() const
Definition VPlan.h:3865
size_t size() const
Definition VPlan.h:3864
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:80
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:299
VPRegionBlock * getParent()
Definition VPlan.h:172
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:190
void setName(const Twine &newName)
Definition VPlan.h:165
size_t getNumSuccessors() const
Definition VPlan.h:218
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:200
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:222
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:649
size_t getNumPredecessors() const
Definition VPlan.h:219
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:290
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:212
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:203
VPlan * getPlan()
Definition VPlan.cpp:165
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:184
const std::string & getName() const
Definition VPlan.h:163
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:214
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:241
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:149
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:204
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:170
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:263
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:208
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:197
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:187
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:90
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:146
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:165
VPlan-based builder utility analogous to IRBuilder.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void dump() const
Dump the VPDef to stderr (for debugging).
Definition VPlan.cpp:126
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:3972
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:450
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:3996
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:475
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:85
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
VPBasicBlock * getParent()
Definition VPlan.h:407
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4007
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:732
const VPBlockBase * getEntry() const
Definition VPlan.h:4043
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:836
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4075
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:793
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:822
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:745
const VPBlockBase * getExiting() const
Definition VPlan.h:4055
friend class VPlan
Definition VPlan.h:4008
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1518
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:199
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1413
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1425
operand_range operands()
Definition VPlanValue.h:267
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:243
unsigned getNumOperands() const
Definition VPlanValue.h:237
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:238
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1379
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:135
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1421
friend class VPDef
Definition VPlanValue.h:49
Value * UnderlyingVal
Hold the underlying Value, if any, attached to this VPValue.
Definition VPlanValue.h:61
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:118
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:98
virtual ~VPValue()
Definition VPlan.cpp:104
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:111
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1382
unsigned getNumUsers() const
Definition VPlanValue.h:113
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1386
VPDef * Def
Pointer to the VPDef that defines this VPValue.
Definition VPlanValue.h:65
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1253
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4137
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1111
friend class VPSlotTracker
Definition VPlan.h:4139
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1087
VPBasicBlock * getEntry()
Definition VPlan.h:4231
void setName(const Twine &newName)
Definition VPlan.h:4381
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:895
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:872
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:903
friend class VPlanPrinter
Definition VPlan.h:4138
unsigned getUF() const
Definition VPlan.h:4363
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1225
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4283
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1016
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:998
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4220
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4455
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1231
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1117
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4274
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:910
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4425
void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1070
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4279
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1032
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1158
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:217
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
bool match(Val *V, const Pattern &P)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2231
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
Definition VPlan.cpp:1723
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
cl::opt< bool > EnableVPlanNativePath
Definition VPlan.cpp:56
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:76
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
ElementCount End
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1734
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1741
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:267
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:227
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:350
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:328
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
static void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...