LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/FMF.h"
38#include "llvm/IR/Operator.h"
41#include <cassert>
42#include <cstddef>
43#include <functional>
44#include <string>
45#include <utility>
46
47namespace llvm {
48
49class BasicBlock;
50class DominatorTree;
52class IRBuilderBase;
53struct VPTransformState;
54class raw_ostream;
56class SCEV;
57class Type;
58class VPBasicBlock;
59class VPBuilder;
60class VPDominatorTree;
61class VPRegionBlock;
62class VPlan;
63class VPLane;
65class VPlanSlp;
66class Value;
68class LoopVersioning;
69
70struct VPCostContext;
71
72namespace Intrinsic {
73typedef unsigned ID;
74}
75
76using VPlanPtr = std::unique_ptr<VPlan>;
77
78/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
79/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
81 friend class VPBlockUtils;
82
83 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
84
85 /// An optional name for the block.
86 std::string Name;
87
88 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
89 /// it is a topmost VPBlockBase.
90 VPRegionBlock *Parent = nullptr;
91
92 /// List of predecessor blocks.
94
95 /// List of successor blocks.
97
98 /// VPlan containing the block. Can only be set on the entry block of the
99 /// plan.
100 VPlan *Plan = nullptr;
101
102 /// Add \p Successor as the last successor to this block.
103 void appendSuccessor(VPBlockBase *Successor) {
104 assert(Successor && "Cannot add nullptr successor!");
105 Successors.push_back(Successor);
106 }
107
108 /// Add \p Predecessor as the last predecessor to this block.
109 void appendPredecessor(VPBlockBase *Predecessor) {
110 assert(Predecessor && "Cannot add nullptr predecessor!");
111 Predecessors.push_back(Predecessor);
112 }
113
114 /// Remove \p Predecessor from the predecessors of this block.
115 void removePredecessor(VPBlockBase *Predecessor) {
116 auto Pos = find(Predecessors, Predecessor);
117 assert(Pos && "Predecessor does not exist");
118 Predecessors.erase(Pos);
119 }
120
121 /// Remove \p Successor from the successors of this block.
122 void removeSuccessor(VPBlockBase *Successor) {
123 auto Pos = find(Successors, Successor);
124 assert(Pos && "Successor does not exist");
125 Successors.erase(Pos);
126 }
127
128 /// This function replaces one predecessor with another, useful when
129 /// trying to replace an old block in the CFG with a new one.
130 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
131 auto I = find(Predecessors, Old);
132 assert(I != Predecessors.end());
133 assert(Old->getParent() == New->getParent() &&
134 "replaced predecessor must have the same parent");
135 *I = New;
136 }
137
138 /// This function replaces one successor with another, useful when
139 /// trying to replace an old block in the CFG with a new one.
140 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
141 auto I = find(Successors, Old);
142 assert(I != Successors.end());
143 assert(Old->getParent() == New->getParent() &&
144 "replaced successor must have the same parent");
145 *I = New;
146 }
147
148protected:
149 VPBlockBase(const unsigned char SC, const std::string &N)
150 : SubclassID(SC), Name(N) {}
151
152public:
153 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
154 /// that are actually instantiated. Values of this enumeration are kept in the
155 /// SubclassID field of the VPBlockBase objects. They are used for concrete
156 /// type identification.
157 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
158
160
161 virtual ~VPBlockBase() = default;
162
163 const std::string &getName() const { return Name; }
164
165 void setName(const Twine &newName) { Name = newName.str(); }
166
167 /// \return an ID for the concrete type of this object.
168 /// This is used to implement the classof checks. This should not be used
169 /// for any other purpose, as the values may change as LLVM evolves.
170 unsigned getVPBlockID() const { return SubclassID; }
171
172 VPRegionBlock *getParent() { return Parent; }
173 const VPRegionBlock *getParent() const { return Parent; }
174
175 /// \return A pointer to the plan containing the current block.
176 VPlan *getPlan();
177 const VPlan *getPlan() const;
178
179 /// Sets the pointer of the plan containing the block. The block must be the
180 /// entry block into the VPlan.
181 void setPlan(VPlan *ParentPlan);
182
183 void setParent(VPRegionBlock *P) { Parent = P; }
184
185 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
186 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
187 /// VPBlockBase is a VPBasicBlock, it is returned.
188 const VPBasicBlock *getEntryBasicBlock() const;
189 VPBasicBlock *getEntryBasicBlock();
190
191 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
192 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
193 /// VPBlockBase is a VPBasicBlock, it is returned.
194 const VPBasicBlock *getExitingBasicBlock() const;
195 VPBasicBlock *getExitingBasicBlock();
196
197 const VPBlocksTy &getSuccessors() const { return Successors; }
198 VPBlocksTy &getSuccessors() { return Successors; }
199
202
203 const VPBlocksTy &getPredecessors() const { return Predecessors; }
204 VPBlocksTy &getPredecessors() { return Predecessors; }
205
206 /// \return the successor of this VPBlockBase if it has a single successor.
207 /// Otherwise return a null pointer.
209 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
210 }
211
212 /// \return the predecessor of this VPBlockBase if it has a single
213 /// predecessor. Otherwise return a null pointer.
215 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
216 }
217
218 size_t getNumSuccessors() const { return Successors.size(); }
219 size_t getNumPredecessors() const { return Predecessors.size(); }
220
221 /// Returns true if this block has any predecessors.
222 bool hasPredecessors() const { return !Predecessors.empty(); }
223
224 /// An Enclosing Block of a block B is any block containing B, including B
225 /// itself. \return the closest enclosing block starting from "this", which
226 /// has successors. \return the root enclosing block if all enclosing blocks
227 /// have no successors.
228 VPBlockBase *getEnclosingBlockWithSuccessors();
229
230 /// \return the closest enclosing block starting from "this", which has
231 /// predecessors. \return the root enclosing block if all enclosing blocks
232 /// have no predecessors.
233 VPBlockBase *getEnclosingBlockWithPredecessors();
234
235 /// \return the successors either attached directly to this VPBlockBase or, if
236 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
237 /// successors of its own, search recursively for the first enclosing
238 /// VPRegionBlock that has successors and return them. If no such
239 /// VPRegionBlock exists, return the (empty) successors of the topmost
240 /// VPBlockBase reached.
242 return getEnclosingBlockWithSuccessors()->getSuccessors();
243 }
244
245 /// \return the hierarchical successor of this VPBlockBase if it has a single
246 /// hierarchical successor. Otherwise return a null pointer.
248 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
249 }
250
251 /// \return the predecessors either attached directly to this VPBlockBase or,
252 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
253 /// predecessors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has predecessors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithPredecessors()->getPredecessors();
259 }
260
261 /// \return the hierarchical predecessor of this VPBlockBase if it has a
262 /// single hierarchical predecessor. Otherwise return a null pointer.
266
267 /// Set a given VPBlockBase \p Successor as the single successor of this
268 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
269 /// This VPBlockBase must have no successors.
271 assert(Successors.empty() && "Setting one successor when others exist.");
272 assert(Successor->getParent() == getParent() &&
273 "connected blocks must have the same parent");
274 appendSuccessor(Successor);
275 }
276
277 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
278 /// successors of this VPBlockBase. This VPBlockBase is not added as
279 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
280 /// successors.
281 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
282 assert(Successors.empty() && "Setting two successors when others exist.");
283 appendSuccessor(IfTrue);
284 appendSuccessor(IfFalse);
285 }
286
287 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
288 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
289 /// as successor of any VPBasicBlock in \p NewPreds.
291 assert(Predecessors.empty() && "Block predecessors already set.");
292 for (auto *Pred : NewPreds)
293 appendPredecessor(Pred);
294 }
295
296 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
297 /// This VPBlockBase must have no successors. This VPBlockBase is not added
298 /// as predecessor of any VPBasicBlock in \p NewSuccs.
300 assert(Successors.empty() && "Block successors already set.");
301 for (auto *Succ : NewSuccs)
302 appendSuccessor(Succ);
303 }
304
305 /// Remove all the predecessor of this block.
306 void clearPredecessors() { Predecessors.clear(); }
307
308 /// Remove all the successors of this block.
309 void clearSuccessors() { Successors.clear(); }
310
311 /// Swap predecessors of the block. The block must have exactly 2
312 /// predecessors.
314 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
315 std::swap(Predecessors[0], Predecessors[1]);
316 }
317
318 /// Swap successors of the block. The block must have exactly 2 successors.
319 // TODO: This should be part of introducing conditional branch recipes rather
320 // than being independent.
322 assert(Successors.size() == 2 && "must have 2 successors to swap");
323 std::swap(Successors[0], Successors[1]);
324 }
325
326 /// Returns the index for \p Pred in the blocks predecessors list.
327 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
328 assert(count(Predecessors, Pred) == 1 &&
329 "must have Pred exactly once in Predecessors");
330 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
331 }
332
333 /// Returns the index for \p Succ in the blocks successor list.
334 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
335 assert(count(Successors, Succ) == 1 &&
336 "must have Succ exactly once in Successors");
337 return std::distance(Successors.begin(), find(Successors, Succ));
338 }
339
340 /// The method which generates the output IR that correspond to this
341 /// VPBlockBase, thereby "executing" the VPlan.
342 virtual void execute(VPTransformState *State) = 0;
343
344 /// Return the cost of the block.
346
347#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
348 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
349 OS << getName();
350 }
351
352 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
353 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
354 /// consequtive numbers.
355 ///
356 /// Note that the numbering is applied to the whole VPlan, so printing
357 /// individual blocks is consistent with the whole VPlan printing.
358 virtual void print(raw_ostream &O, const Twine &Indent,
359 VPSlotTracker &SlotTracker) const = 0;
360
361 /// Print plain-text dump of this VPlan to \p O.
362 void print(raw_ostream &O) const;
363
364 /// Print the successors of this block to \p O, prefixing all lines with \p
365 /// Indent.
366 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
367
368 /// Dump this VPBlockBase to dbgs().
369 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
370#endif
371
372 /// Clone the current block and it's recipes without updating the operands of
373 /// the cloned recipes, including all blocks in the single-entry single-exit
374 /// region for VPRegionBlocks.
375 virtual VPBlockBase *clone() = 0;
376};
377
378/// VPRecipeBase is a base class modeling a sequence of one or more output IR
379/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
380/// and is responsible for deleting its defined values. Single-value
381/// recipes must inherit from VPSingleDef instead of inheriting from both
382/// VPRecipeBase and VPValue separately.
384 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
385 public VPDef,
386 public VPUser {
387 friend VPBasicBlock;
388 friend class VPBlockUtils;
389
390 /// Each VPRecipe belongs to a single VPBasicBlock.
391 VPBasicBlock *Parent = nullptr;
392
393 /// The debug location for the recipe.
394 DebugLoc DL;
395
396public:
397 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
399 : VPDef(SC), VPUser(Operands), DL(DL) {}
400
401 ~VPRecipeBase() override = default;
402
403 /// Clone the current recipe.
404 virtual VPRecipeBase *clone() = 0;
405
406 /// \return the VPBasicBlock which this VPRecipe belongs to.
407 VPBasicBlock *getParent() { return Parent; }
408 const VPBasicBlock *getParent() const { return Parent; }
409
410 /// \return the VPRegionBlock which the recipe belongs to.
411 VPRegionBlock *getRegion();
412 const VPRegionBlock *getRegion() const;
413
414 /// The method which generates the output IR instructions that correspond to
415 /// this VPRecipe, thereby "executing" the VPlan.
416 virtual void execute(VPTransformState &State) = 0;
417
418 /// Return the cost of this recipe, taking into account if the cost
419 /// computation should be skipped and the ForceTargetInstructionCost flag.
420 /// Also takes care of printing the cost for debugging.
422
423 /// Insert an unlinked recipe into a basic block immediately before
424 /// the specified recipe.
425 void insertBefore(VPRecipeBase *InsertPos);
426 /// Insert an unlinked recipe into \p BB immediately before the insertion
427 /// point \p IP;
428 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
429
430 /// Insert an unlinked Recipe into a basic block immediately after
431 /// the specified Recipe.
432 void insertAfter(VPRecipeBase *InsertPos);
433
434 /// Unlink this recipe from its current VPBasicBlock and insert it into
435 /// the VPBasicBlock that MovePos lives in, right after MovePos.
436 void moveAfter(VPRecipeBase *MovePos);
437
438 /// Unlink this recipe and insert into BB before I.
439 ///
440 /// \pre I is a valid iterator into BB.
441 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
442
443 /// This method unlinks 'this' from the containing basic block, but does not
444 /// delete it.
445 void removeFromParent();
446
447 /// This method unlinks 'this' from the containing basic block and deletes it.
448 ///
449 /// \returns an iterator pointing to the element after the erased one
451
452 /// Method to support type inquiry through isa, cast, and dyn_cast.
453 static inline bool classof(const VPDef *D) {
454 // All VPDefs are also VPRecipeBases.
455 return true;
456 }
457
458 static inline bool classof(const VPUser *U) { return true; }
459
460 /// Returns true if the recipe may have side-effects.
461 bool mayHaveSideEffects() const;
462
463 /// Returns true for PHI-like recipes.
464 bool isPhi() const;
465
466 /// Returns true if the recipe may read from memory.
467 bool mayReadFromMemory() const;
468
469 /// Returns true if the recipe may write to memory.
470 bool mayWriteToMemory() const;
471
472 /// Returns true if the recipe may read from or write to memory.
473 bool mayReadOrWriteMemory() const {
475 }
476
477 /// Returns the debug location of the recipe.
478 DebugLoc getDebugLoc() const { return DL; }
479
480 /// Return true if the recipe is a scalar cast.
481 bool isScalarCast() const;
482
483 /// Set the recipe's debug location to \p NewDL.
484 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
485
486protected:
487 /// Compute the cost of this recipe either using a recipe's specialized
488 /// implementation or using the legacy cost model and the underlying
489 /// instructions.
490 virtual InstructionCost computeCost(ElementCount VF,
491 VPCostContext &Ctx) const;
492};
493
494// Helper macro to define common classof implementations for recipes.
495#define VP_CLASSOF_IMPL(VPDefID) \
496 static inline bool classof(const VPDef *D) { \
497 return D->getVPDefID() == VPDefID; \
498 } \
499 static inline bool classof(const VPValue *V) { \
500 auto *R = V->getDefiningRecipe(); \
501 return R && R->getVPDefID() == VPDefID; \
502 } \
503 static inline bool classof(const VPUser *U) { \
504 auto *R = dyn_cast<VPRecipeBase>(U); \
505 return R && R->getVPDefID() == VPDefID; \
506 } \
507 static inline bool classof(const VPRecipeBase *R) { \
508 return R->getVPDefID() == VPDefID; \
509 } \
510 static inline bool classof(const VPSingleDefRecipe *R) { \
511 return R->getVPDefID() == VPDefID; \
512 }
513
514/// VPSingleDef is a base class for recipes for modeling a sequence of one or
515/// more output IR that define a single result VPValue.
516/// Note that VPRecipeBase must be inherited from before VPValue.
517class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
518public:
519 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
521 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
522
523 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
525 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
526
527 static inline bool classof(const VPRecipeBase *R) {
528 switch (R->getVPDefID()) {
529 case VPRecipeBase::VPDerivedIVSC:
530 case VPRecipeBase::VPEVLBasedIVPHISC:
531 case VPRecipeBase::VPExpandSCEVSC:
532 case VPRecipeBase::VPExpressionSC:
533 case VPRecipeBase::VPInstructionSC:
534 case VPRecipeBase::VPReductionEVLSC:
535 case VPRecipeBase::VPReductionSC:
536 case VPRecipeBase::VPReplicateSC:
537 case VPRecipeBase::VPScalarIVStepsSC:
538 case VPRecipeBase::VPVectorPointerSC:
539 case VPRecipeBase::VPVectorEndPointerSC:
540 case VPRecipeBase::VPWidenCallSC:
541 case VPRecipeBase::VPWidenCanonicalIVSC:
542 case VPRecipeBase::VPWidenCastSC:
543 case VPRecipeBase::VPWidenGEPSC:
544 case VPRecipeBase::VPWidenIntrinsicSC:
545 case VPRecipeBase::VPWidenSC:
546 case VPRecipeBase::VPWidenSelectSC:
547 case VPRecipeBase::VPBlendSC:
548 case VPRecipeBase::VPPredInstPHISC:
549 case VPRecipeBase::VPCanonicalIVPHISC:
550 case VPRecipeBase::VPActiveLaneMaskPHISC:
551 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
552 case VPRecipeBase::VPWidenPHISC:
553 case VPRecipeBase::VPWidenIntOrFpInductionSC:
554 case VPRecipeBase::VPWidenPointerInductionSC:
555 case VPRecipeBase::VPReductionPHISC:
556 case VPRecipeBase::VPPartialReductionSC:
557 return true;
558 case VPRecipeBase::VPBranchOnMaskSC:
559 case VPRecipeBase::VPInterleaveEVLSC:
560 case VPRecipeBase::VPInterleaveSC:
561 case VPRecipeBase::VPIRInstructionSC:
562 case VPRecipeBase::VPWidenLoadEVLSC:
563 case VPRecipeBase::VPWidenLoadSC:
564 case VPRecipeBase::VPWidenStoreEVLSC:
565 case VPRecipeBase::VPWidenStoreSC:
566 case VPRecipeBase::VPHistogramSC:
567 // TODO: Widened stores don't define a value, but widened loads do. Split
568 // the recipes to be able to make widened loads VPSingleDefRecipes.
569 return false;
570 }
571 llvm_unreachable("Unhandled VPDefID");
572 }
573
574 static inline bool classof(const VPUser *U) {
575 auto *R = dyn_cast<VPRecipeBase>(U);
576 return R && classof(R);
577 }
578
579 VPSingleDefRecipe *clone() override = 0;
580
581 /// Returns the underlying instruction.
588
589#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
590 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
591 LLVM_DUMP_METHOD void dump() const;
592#endif
593};
594
595/// Class to record and manage LLVM IR flags.
597 enum class OperationType : unsigned char {
598 Cmp,
599 OverflowingBinOp,
600 Trunc,
601 DisjointOp,
602 PossiblyExactOp,
603 GEPOp,
604 FPMathOp,
605 NonNegOp,
606 Other
607 };
608
609public:
610 struct WrapFlagsTy {
611 char HasNUW : 1;
612 char HasNSW : 1;
613
615 };
616
618 char HasNUW : 1;
619 char HasNSW : 1;
620
622 };
623
628
630 char NonNeg : 1;
631 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
632 };
633
634private:
635 struct ExactFlagsTy {
636 char IsExact : 1;
637 };
638 struct FastMathFlagsTy {
639 char AllowReassoc : 1;
640 char NoNaNs : 1;
641 char NoInfs : 1;
642 char NoSignedZeros : 1;
643 char AllowReciprocal : 1;
644 char AllowContract : 1;
645 char ApproxFunc : 1;
646
647 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
648 };
649
650 OperationType OpType;
651
652 union {
657 ExactFlagsTy ExactFlags;
660 FastMathFlagsTy FMFs;
661 unsigned AllFlags;
662 };
663
664public:
665 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
666
668 if (auto *Op = dyn_cast<CmpInst>(&I)) {
669 OpType = OperationType::Cmp;
670 CmpPredicate = Op->getPredicate();
671 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
672 OpType = OperationType::DisjointOp;
673 DisjointFlags.IsDisjoint = Op->isDisjoint();
674 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
675 OpType = OperationType::OverflowingBinOp;
676 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
677 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
678 OpType = OperationType::Trunc;
679 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
680 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
681 OpType = OperationType::PossiblyExactOp;
682 ExactFlags.IsExact = Op->isExact();
683 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
684 OpType = OperationType::GEPOp;
685 GEPFlags = GEP->getNoWrapFlags();
686 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
687 OpType = OperationType::NonNegOp;
688 NonNegFlags.NonNeg = PNNI->hasNonNeg();
689 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
690 OpType = OperationType::FPMathOp;
691 FMFs = Op->getFastMathFlags();
692 } else {
693 OpType = OperationType::Other;
694 AllFlags = 0;
695 }
696 }
697
699 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
700
702 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
703
705 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
706
707 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
708
710 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
711
713 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
714
716 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
717
719 OpType = Other.OpType;
720 AllFlags = Other.AllFlags;
721 }
722
723 /// Only keep flags also present in \p Other. \p Other must have the same
724 /// OpType as the current object.
725 void intersectFlags(const VPIRFlags &Other);
726
727 /// Drop all poison-generating flags.
729 // NOTE: This needs to be kept in-sync with
730 // Instruction::dropPoisonGeneratingFlags.
731 switch (OpType) {
732 case OperationType::OverflowingBinOp:
733 WrapFlags.HasNUW = false;
734 WrapFlags.HasNSW = false;
735 break;
736 case OperationType::Trunc:
737 TruncFlags.HasNUW = false;
738 TruncFlags.HasNSW = false;
739 break;
740 case OperationType::DisjointOp:
741 DisjointFlags.IsDisjoint = false;
742 break;
743 case OperationType::PossiblyExactOp:
744 ExactFlags.IsExact = false;
745 break;
746 case OperationType::GEPOp:
748 break;
749 case OperationType::FPMathOp:
750 FMFs.NoNaNs = false;
751 FMFs.NoInfs = false;
752 break;
753 case OperationType::NonNegOp:
754 NonNegFlags.NonNeg = false;
755 break;
756 case OperationType::Cmp:
757 case OperationType::Other:
758 break;
759 }
760 }
761
762 /// Apply the IR flags to \p I.
763 void applyFlags(Instruction &I) const {
764 switch (OpType) {
765 case OperationType::OverflowingBinOp:
766 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
767 I.setHasNoSignedWrap(WrapFlags.HasNSW);
768 break;
769 case OperationType::Trunc:
770 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
771 I.setHasNoSignedWrap(TruncFlags.HasNSW);
772 break;
773 case OperationType::DisjointOp:
774 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
775 break;
776 case OperationType::PossiblyExactOp:
777 I.setIsExact(ExactFlags.IsExact);
778 break;
779 case OperationType::GEPOp:
780 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
781 break;
782 case OperationType::FPMathOp:
783 I.setHasAllowReassoc(FMFs.AllowReassoc);
784 I.setHasNoNaNs(FMFs.NoNaNs);
785 I.setHasNoInfs(FMFs.NoInfs);
786 I.setHasNoSignedZeros(FMFs.NoSignedZeros);
787 I.setHasAllowReciprocal(FMFs.AllowReciprocal);
788 I.setHasAllowContract(FMFs.AllowContract);
789 I.setHasApproxFunc(FMFs.ApproxFunc);
790 break;
791 case OperationType::NonNegOp:
792 I.setNonNeg(NonNegFlags.NonNeg);
793 break;
794 case OperationType::Cmp:
795 case OperationType::Other:
796 break;
797 }
798 }
799
801 assert(OpType == OperationType::Cmp &&
802 "recipe doesn't have a compare predicate");
803 return CmpPredicate;
804 }
805
807 assert(OpType == OperationType::Cmp &&
808 "recipe doesn't have a compare predicate");
809 CmpPredicate = Pred;
810 }
811
813
814 /// Returns true if the recipe has a comparison predicate.
815 bool hasPredicate() const { return OpType == OperationType::Cmp; }
816
817 /// Returns true if the recipe has fast-math flags.
818 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
819
821
822 /// Returns true if the recipe has non-negative flag.
823 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
824
825 bool isNonNeg() const {
826 assert(OpType == OperationType::NonNegOp &&
827 "recipe doesn't have a NNEG flag");
828 return NonNegFlags.NonNeg;
829 }
830
831 bool hasNoUnsignedWrap() const {
832 switch (OpType) {
833 case OperationType::OverflowingBinOp:
834 return WrapFlags.HasNUW;
835 case OperationType::Trunc:
836 return TruncFlags.HasNUW;
837 default:
838 llvm_unreachable("recipe doesn't have a NUW flag");
839 }
840 }
841
842 bool hasNoSignedWrap() const {
843 switch (OpType) {
844 case OperationType::OverflowingBinOp:
845 return WrapFlags.HasNSW;
846 case OperationType::Trunc:
847 return TruncFlags.HasNSW;
848 default:
849 llvm_unreachable("recipe doesn't have a NSW flag");
850 }
851 }
852
853 bool isDisjoint() const {
854 assert(OpType == OperationType::DisjointOp &&
855 "recipe cannot have a disjoing flag");
856 return DisjointFlags.IsDisjoint;
857 }
858
859#if !defined(NDEBUG)
860 /// Returns true if the set flags are valid for \p Opcode.
861 bool flagsValidForOpcode(unsigned Opcode) const;
862#endif
863
864#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
865 void printFlags(raw_ostream &O) const;
866#endif
867};
868
869/// A pure-virtual common base class for recipes defining a single VPValue and
870/// using IR flags.
872 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
874 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {}
875
876 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
877 Instruction &I)
878 : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {}
879
880 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
881 const VPIRFlags &Flags,
883 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
884
885 static inline bool classof(const VPRecipeBase *R) {
886 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
887 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
888 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
889 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
890 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
891 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
892 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
893 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
894 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
895 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
896 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
897 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
898 }
899
900 static inline bool classof(const VPUser *U) {
901 auto *R = dyn_cast<VPRecipeBase>(U);
902 return R && classof(R);
903 }
904
905 static inline bool classof(const VPValue *V) {
906 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
907 return R && classof(R);
908 }
909
910 VPRecipeWithIRFlags *clone() override = 0;
911
912 static inline bool classof(const VPSingleDefRecipe *U) {
913 auto *R = dyn_cast<VPRecipeBase>(U);
914 return R && classof(R);
915 }
916
917 void execute(VPTransformState &State) override = 0;
918
919 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
921 VPCostContext &Ctx) const;
922};
923
924/// Helper to access the operand that contains the unroll part for this recipe
925/// after unrolling.
926template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
927protected:
928 /// Return the VPValue operand containing the unroll part or null if there is
929 /// no such operand.
930 VPValue *getUnrollPartOperand(const VPUser &U) const;
931
932 /// Return the unroll part.
933 unsigned getUnrollPart(const VPUser &U) const;
934};
935
936/// Helper to manage IR metadata for recipes. It filters out metadata that
937/// cannot be propagated.
940
941public:
942 VPIRMetadata() = default;
943
944 /// Adds metatadata that can be preserved from the original instruction
945 /// \p I.
947
948 /// Adds metatadata that can be preserved from the original instruction
949 /// \p I and noalias metadata guaranteed by runtime checks using \p LVer.
951
952 /// Copy constructor for cloning.
953 VPIRMetadata(const VPIRMetadata &Other) = default;
954
956
957 /// Add all metadata to \p I.
958 void applyMetadata(Instruction &I) const;
959
960 /// Add metadata with kind \p Kind and \p Node.
961 void addMetadata(unsigned Kind, MDNode *Node) {
962 Metadata.emplace_back(Kind, Node);
963 }
964
965 /// Intersect this VPIRMetada object with \p MD, keeping only metadata
966 /// nodes that are common to both.
967 void intersect(const VPIRMetadata &MD);
968};
969
970/// This is a concrete Recipe that models a single VPlan-level instruction.
971/// While as any Recipe it may generate a sequence of IR instructions when
972/// executed, these instructions would always form a single-def expression as
973/// the VPInstruction is also a single def-use vertex.
975 public VPIRMetadata,
976 public VPUnrollPartAccessor<1> {
977 friend class VPlanSlp;
978
979public:
980 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
981 enum {
983 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
984 // values of a first-order recurrence.
988 // Creates a mask where each lane is active (true) whilst the current
989 // counter (first operand + index) is less than the second operand. i.e.
990 // mask[i] = icmpt ult (op0 + i), op1
991 // The size of the mask returned is VF * Multiplier (UF, third op).
995 // Increment the canonical IV separately for each unrolled part.
1000 /// Given operands of (the same) struct type, creates a struct of fixed-
1001 /// width vectors each containing a struct field of all operands. The
1002 /// number of operands matches the element count of every vector.
1004 /// Creates a fixed-width vector containing all operands. The number of
1005 /// operands matches the vector element count.
1007 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1008 /// abstract VPInstruction whose single defined VPValue represents VF
1009 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1010 /// VPInstructions.
1012 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1013 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1017 // Extracts the last lane from its operand if it is a vector, or the last
1018 // part if scalar. In the latter case, the recipe will be removed during
1019 // unrolling.
1021 // Extracts the last lane for each part from its operand.
1023 // Extracts the second-to-last lane from its operand or the second-to-last
1024 // part if it is scalar. In the latter case, the recipe will be removed
1025 // during unrolling.
1027 LogicalAnd, // Non-poison propagating logical And.
1028 // Add an offset in bytes (second operand) to a base pointer (first
1029 // operand). Only generates scalar values (either for the first lane only or
1030 // for all lanes, depending on its uses).
1032 // Add a vector offset in bytes (second operand) to a scalar base pointer
1033 // (first operand).
1035 // Returns a scalar boolean value, which is true if any lane of its
1036 // (boolean) vector operands is true. It produces the reduced value across
1037 // all unrolled iterations. Unrolling will add all copies of its original
1038 // operand as additional operands. AnyOf is poison-safe as all operands
1039 // will be frozen.
1041 // Calculates the first active lane index of the vector predicate operands.
1042 // It produces the lane index across all unrolled iterations. Unrolling will
1043 // add all copies of its original operand as additional operands.
1045
1046 // The opcodes below are used for VPInstructionWithType.
1047 //
1048 /// Scale the first operand (vector step) by the second operand
1049 /// (scalar-step). Casts both operands to the result type if needed.
1051 /// Start vector for reductions with 3 operands: the original start value,
1052 /// the identity value for the reduction and an integer indicating the
1053 /// scaling factor.
1055 // Creates a step vector starting from 0 to VF with a step of 1.
1057 /// Extracts a single lane (first operand) from a set of vector operands.
1058 /// The lane specifies an index into a vector formed by combining all vector
1059 /// operands (all operands after the first one).
1061 /// Explicit user for the resume phi of the canonical induction in the main
1062 /// VPlan, used by the epilogue vector loop.
1064 /// Returns the value for vscale.
1067 };
1068
1069 /// Returns true if this VPInstruction generates scalar values for all lanes.
1070 /// Most VPInstructions generate a single value per part, either vector or
1071 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1072 /// values per all lanes, stemming from an original ingredient. This method
1073 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1074 /// underlying ingredient.
1075 bool doesGeneratePerAllLanes() const;
1076
1077private:
1078 typedef unsigned char OpcodeTy;
1079 OpcodeTy Opcode;
1080
1081 /// An optional name that can be used for the generated IR instruction.
1082 const std::string Name;
1083
1084 /// Returns true if we can generate a scalar for the first lane only if
1085 /// needed.
1086 bool canGenerateScalarForFirstLane() const;
1087
1088 /// Utility methods serving execute(): generates a single vector instance of
1089 /// the modeled instruction. \returns the generated value. . In some cases an
1090 /// existing value is returned rather than a generated one.
1091 Value *generate(VPTransformState &State);
1092
1093#if !defined(NDEBUG)
1094 /// Return the number of operands determined by the opcode of the
1095 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1096 /// directly by the opcode.
1097 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1098#endif
1099
1100public:
1101 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1102 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
1103 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1104 VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}
1105
1106 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1107 const VPIRFlags &Flags, const VPIRMetadata &MD = {},
1108 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1109
1110 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1111
1112 VPInstruction *clone() override {
1113 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1114 getDebugLoc(), Name);
1115 if (getUnderlyingValue())
1116 New->setUnderlyingValue(getUnderlyingInstr());
1117 return New;
1118 }
1119
1120 unsigned getOpcode() const { return Opcode; }
1121
1122 /// Generate the instruction.
1123 /// TODO: We currently execute only per-part unless a specific instance is
1124 /// provided.
1125 void execute(VPTransformState &State) override;
1126
1127 /// Return the cost of this VPInstruction.
1128 InstructionCost computeCost(ElementCount VF,
1129 VPCostContext &Ctx) const override;
1130
1131#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1132 /// Print the VPInstruction to \p O.
1133 void print(raw_ostream &O, const Twine &Indent,
1134 VPSlotTracker &SlotTracker) const override;
1135
1136 /// Print the VPInstruction to dbgs() (for debugging).
1137 LLVM_DUMP_METHOD void dump() const;
1138#endif
1139
1140 bool hasResult() const {
1141 // CallInst may or may not have a result, depending on the called function.
1142 // Conservatively return calls have results for now.
1143 switch (getOpcode()) {
1144 case Instruction::Ret:
1145 case Instruction::Br:
1146 case Instruction::Store:
1147 case Instruction::Switch:
1148 case Instruction::IndirectBr:
1149 case Instruction::Resume:
1150 case Instruction::CatchRet:
1151 case Instruction::Unreachable:
1152 case Instruction::Fence:
1153 case Instruction::AtomicRMW:
1156 return false;
1157 default:
1158 return true;
1159 }
1160 }
1161
1162 /// Returns true if the underlying opcode may read from or write to memory.
1163 bool opcodeMayReadOrWriteFromMemory() const;
1164
1165 /// Returns true if the recipe only uses the first lane of operand \p Op.
1166 bool usesFirstLaneOnly(const VPValue *Op) const override;
1167
1168 /// Returns true if the recipe only uses the first part of operand \p Op.
1169 bool usesFirstPartOnly(const VPValue *Op) const override;
1170
1171 /// Returns true if this VPInstruction produces a scalar value from a vector,
1172 /// e.g. by performing a reduction or extracting a lane.
1173 bool isVectorToScalar() const;
1174
1175 /// Returns true if this VPInstruction's operands are single scalars and the
1176 /// result is also a single scalar.
1177 bool isSingleScalar() const;
1178
1179 /// Returns the symbolic name assigned to the VPInstruction.
1180 StringRef getName() const { return Name; }
1181};
1182
1183/// A specialization of VPInstruction augmenting it with a dedicated result
1184/// type, to be used when the opcode and operands of the VPInstruction don't
1185/// directly determine the result type. Note that there is no separate VPDef ID
1186/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1187/// distinguished purely by the opcode.
1189 /// Scalar result type produced by the recipe.
1190 Type *ResultTy;
1191
1192public:
1194 Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL,
1195 const Twine &Name = "")
1196 : VPInstruction(Opcode, Operands, Flags, {}, DL, Name),
1197 ResultTy(ResultTy) {}
1198
1200 Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags,
1201 const VPIRMetadata &Metadata, const Twine &Name = "")
1202 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1203 ResultTy(ResultTy) {}
1204
1205 static inline bool classof(const VPRecipeBase *R) {
1206 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1207 // type information.
1208 if (R->isScalarCast())
1209 return true;
1210 auto *VPI = dyn_cast<VPInstruction>(R);
1211 if (!VPI)
1212 return false;
1213 switch (VPI->getOpcode()) {
1217 return true;
1218 default:
1219 return false;
1220 }
1221 }
1222
1223 static inline bool classof(const VPUser *R) {
1225 }
1226
1227 VPInstruction *clone() override {
1228 auto *New =
1230 *this, getDebugLoc(), getName());
1231 New->setUnderlyingValue(getUnderlyingValue());
1232 return New;
1233 }
1234
1235 void execute(VPTransformState &State) override;
1236
1237 /// Return the cost of this VPInstruction.
1239 VPCostContext &Ctx) const override {
1240 // TODO: Compute accurate cost after retiring the legacy cost model.
1241 return 0;
1242 }
1243
1244 Type *getResultType() const { return ResultTy; }
1245
1246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1247 /// Print the recipe.
1248 void print(raw_ostream &O, const Twine &Indent,
1249 VPSlotTracker &SlotTracker) const override;
1250#endif
1251};
1252
1253/// Helper type to provide functions to access incoming values and blocks for
1254/// phi-like recipes.
1256protected:
1257 /// Return a VPRecipeBase* to the current object.
1258 virtual const VPRecipeBase *getAsRecipe() const = 0;
1259
1260public:
1261 virtual ~VPPhiAccessors() = default;
1262
1263 /// Returns the incoming VPValue with index \p Idx.
1264 VPValue *getIncomingValue(unsigned Idx) const {
1265 return getAsRecipe()->getOperand(Idx);
1266 }
1267
1268 /// Returns the incoming block with index \p Idx.
1269 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1270
1271 /// Returns the number of incoming values, also number of incoming blocks.
1272 virtual unsigned getNumIncoming() const {
1273 return getAsRecipe()->getNumOperands();
1274 }
1275
1276 /// Returns an interator range over the incoming values.
1278 return make_range(getAsRecipe()->op_begin(),
1279 getAsRecipe()->op_begin() + getNumIncoming());
1280 }
1281
1283 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1284
1285 /// Returns an iterator range over the incoming blocks.
1287 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1288 return getIncomingBlock(Idx);
1289 };
1290 return map_range(index_range(0, getNumIncoming()), GetBlock);
1291 }
1292
1293 /// Returns an iterator range over pairs of incoming values and corresponding
1294 /// incoming blocks.
1300
1301 /// Removes the incoming value for \p IncomingBlock, which must be a
1302 /// predecessor.
1303 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1304
1305#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1306 /// Print the recipe.
1308#endif
1309};
1310
1312 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1313 : VPInstruction(Instruction::PHI, Operands, DL, Name) {}
1314
1315 static inline bool classof(const VPUser *U) {
1316 auto *VPI = dyn_cast<VPInstruction>(U);
1317 return VPI && VPI->getOpcode() == Instruction::PHI;
1318 }
1319
1320 static inline bool classof(const VPValue *V) {
1321 auto *VPI = dyn_cast<VPInstruction>(V);
1322 return VPI && VPI->getOpcode() == Instruction::PHI;
1323 }
1324
1325 static inline bool classof(const VPSingleDefRecipe *SDR) {
1326 auto *VPI = dyn_cast<VPInstruction>(SDR);
1327 return VPI && VPI->getOpcode() == Instruction::PHI;
1328 }
1329
1330 VPPhi *clone() override {
1331 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1332 PhiR->setUnderlyingValue(getUnderlyingValue());
1333 return PhiR;
1334 }
1335
1336 void execute(VPTransformState &State) override;
1337
1338#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1339 /// Print the recipe.
1340 void print(raw_ostream &O, const Twine &Indent,
1341 VPSlotTracker &SlotTracker) const override;
1342#endif
1343
1344protected:
1345 const VPRecipeBase *getAsRecipe() const override { return this; }
1346};
1347
1348/// A recipe to wrap on original IR instruction not to be modified during
1349/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1350/// Expect PHIs, VPIRInstructions cannot have any operands.
1352 Instruction &I;
1353
1354protected:
1355 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1356 /// subclasses may need to be created, e.g. VPIRPhi.
1358 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1359
1360public:
1361 ~VPIRInstruction() override = default;
1362
1363 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1364 /// VPIRInstruction.
1366
1367 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1368
1370 auto *R = create(I);
1371 for (auto *Op : operands())
1372 R->addOperand(Op);
1373 return R;
1374 }
1375
1376 void execute(VPTransformState &State) override;
1377
1378 /// Return the cost of this VPIRInstruction.
1380 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1381
1382 Instruction &getInstruction() const { return I; }
1383
1384#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1385 /// Print the recipe.
1386 void print(raw_ostream &O, const Twine &Indent,
1387 VPSlotTracker &SlotTracker) const override;
1388#endif
1389
1390 bool usesScalars(const VPValue *Op) const override {
1392 "Op must be an operand of the recipe");
1393 return true;
1394 }
1395
1396 bool usesFirstPartOnly(const VPValue *Op) const override {
1398 "Op must be an operand of the recipe");
1399 return true;
1400 }
1401
1402 bool usesFirstLaneOnly(const VPValue *Op) const override {
1404 "Op must be an operand of the recipe");
1405 return true;
1406 }
1407
1408 /// Update the recipes first operand to the last lane of the operand using \p
1409 /// Builder. Must only be used for VPIRInstructions with at least one operand
1410 /// wrapping a PHINode.
1412};
1413
1414/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1415/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1416/// allowed, and it is used to add a new incoming value for the single
1417/// predecessor VPBB.
1419 public VPPhiAccessors {
1421
1422 static inline bool classof(const VPRecipeBase *U) {
1423 auto *R = dyn_cast<VPIRInstruction>(U);
1424 return R && isa<PHINode>(R->getInstruction());
1425 }
1426
1428
1429 void execute(VPTransformState &State) override;
1430
1431#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1432 /// Print the recipe.
1433 void print(raw_ostream &O, const Twine &Indent,
1434 VPSlotTracker &SlotTracker) const override;
1435#endif
1436
1437protected:
1438 const VPRecipeBase *getAsRecipe() const override { return this; }
1439};
1440
1441/// VPWidenRecipe is a recipe for producing a widened instruction using the
1442/// opcode and operands of the recipe. This recipe covers most of the
1443/// traditional vectorization cases where each recipe transforms into a
1444/// vectorized version of itself.
1446 public VPIRMetadata {
1447 unsigned Opcode;
1448
1449public:
1450 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1451 const VPIRFlags &Flags, const VPIRMetadata &Metadata,
1452 DebugLoc DL)
1453 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1454 VPIRMetadata(Metadata), Opcode(Opcode) {}
1455
1457 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I),
1458 Opcode(I.getOpcode()) {}
1459
1460 ~VPWidenRecipe() override = default;
1461
1462 VPWidenRecipe *clone() override {
1463 auto *R =
1464 new VPWidenRecipe(getOpcode(), operands(), *this, *this, getDebugLoc());
1465 R->setUnderlyingValue(getUnderlyingValue());
1466 return R;
1467 }
1468
1469 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1470
1471 /// Produce a widened instruction using the opcode and operands of the recipe,
1472 /// processing State.VF elements.
1473 void execute(VPTransformState &State) override;
1474
1475 /// Return the cost of this VPWidenRecipe.
1476 InstructionCost computeCost(ElementCount VF,
1477 VPCostContext &Ctx) const override;
1478
1479 unsigned getOpcode() const { return Opcode; }
1480
1481#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1482 /// Print the recipe.
1483 void print(raw_ostream &O, const Twine &Indent,
1484 VPSlotTracker &SlotTracker) const override;
1485#endif
1486};
1487
1488/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1490 /// Cast instruction opcode.
1491 Instruction::CastOps Opcode;
1492
1493 /// Result type for the cast.
1494 Type *ResultTy;
1495
1496public:
1498 CastInst &UI)
1499 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPIRMetadata(UI),
1500 Opcode(Opcode), ResultTy(ResultTy) {
1501 assert(UI.getOpcode() == Opcode &&
1502 "opcode of underlying cast doesn't match");
1503 }
1504
1506 const VPIRFlags &Flags = {},
1507 const VPIRMetadata &Metadata = {},
1509 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1510 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1511 assert(flagsValidForOpcode(Opcode) &&
1512 "Set flags not supported for the provided opcode");
1513 }
1514
1515 ~VPWidenCastRecipe() override = default;
1516
1518 auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this,
1519 *this, getDebugLoc());
1520 if (auto *UV = getUnderlyingValue())
1521 New->setUnderlyingValue(UV);
1522 return New;
1523 }
1524
1525 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1526
1527 /// Produce widened copies of the cast.
1528 void execute(VPTransformState &State) override;
1529
1530 /// Return the cost of this VPWidenCastRecipe.
1532 VPCostContext &Ctx) const override;
1533
1534#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1535 /// Print the recipe.
1536 void print(raw_ostream &O, const Twine &Indent,
1537 VPSlotTracker &SlotTracker) const override;
1538#endif
1539
1540 Instruction::CastOps getOpcode() const { return Opcode; }
1541
1542 /// Returns the result type of the cast.
1543 Type *getResultType() const { return ResultTy; }
1544};
1545
1546/// A recipe for widening vector intrinsics.
1548 /// ID of the vector intrinsic to widen.
1549 Intrinsic::ID VectorIntrinsicID;
1550
1551 /// Scalar return type of the intrinsic.
1552 Type *ResultTy;
1553
1554 /// True if the intrinsic may read from memory.
1555 bool MayReadFromMemory;
1556
1557 /// True if the intrinsic may read write to memory.
1558 bool MayWriteToMemory;
1559
1560 /// True if the intrinsic may have side-effects.
1561 bool MayHaveSideEffects;
1562
1563public:
1565 ArrayRef<VPValue *> CallArguments, Type *Ty,
1567 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1568 VPIRMetadata(CI), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1569 MayReadFromMemory(CI.mayReadFromMemory()),
1570 MayWriteToMemory(CI.mayWriteToMemory()),
1571 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1572
1574 ArrayRef<VPValue *> CallArguments, Type *Ty,
1576 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1577 VPIRMetadata(), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1578 LLVMContext &Ctx = Ty->getContext();
1579 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1580 MemoryEffects ME = Attrs.getMemoryEffects();
1581 MayReadFromMemory = !ME.onlyWritesMemory();
1582 MayWriteToMemory = !ME.onlyReadsMemory();
1583 MayHaveSideEffects = MayWriteToMemory ||
1584 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1585 !Attrs.hasAttribute(Attribute::WillReturn);
1586 }
1587
1588 ~VPWidenIntrinsicRecipe() override = default;
1589
1591 if (Value *CI = getUnderlyingValue())
1592 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1593 operands(), ResultTy, getDebugLoc());
1594 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1595 getDebugLoc());
1596 }
1597
1598 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1599
1600 /// Produce a widened version of the vector intrinsic.
1601 void execute(VPTransformState &State) override;
1602
1603 /// Return the cost of this vector intrinsic.
1605 VPCostContext &Ctx) const override;
1606
1607 /// Return the ID of the intrinsic.
1608 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1609
1610 /// Return the scalar return type of the intrinsic.
1611 Type *getResultType() const { return ResultTy; }
1612
1613 /// Return to name of the intrinsic as string.
1615
1616 /// Returns true if the intrinsic may read from memory.
1617 bool mayReadFromMemory() const { return MayReadFromMemory; }
1618
1619 /// Returns true if the intrinsic may write to memory.
1620 bool mayWriteToMemory() const { return MayWriteToMemory; }
1621
1622 /// Returns true if the intrinsic may have side-effects.
1623 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1624
1625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1626 /// Print the recipe.
1627 void print(raw_ostream &O, const Twine &Indent,
1628 VPSlotTracker &SlotTracker) const override;
1629#endif
1630
1631 bool usesFirstLaneOnly(const VPValue *Op) const override;
1632};
1633
1634/// A recipe for widening Call instructions using library calls.
1636 public VPIRMetadata {
1637 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1638 /// between a given VF and the chosen vectorized variant, so there will be a
1639 /// different VPlan for each VF with a valid variant.
1640 Function *Variant;
1641
1642public:
1644 ArrayRef<VPValue *> CallArguments,
1646 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1647 *cast<Instruction>(UV)),
1648 VPIRMetadata(*cast<Instruction>(UV)), Variant(Variant) {
1649 assert(
1651 "last operand must be the called function");
1652 }
1653
1654 ~VPWidenCallRecipe() override = default;
1655
1657 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1658 getDebugLoc());
1659 }
1660
1661 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1662
1663 /// Produce a widened version of the call instruction.
1664 void execute(VPTransformState &State) override;
1665
1666 /// Return the cost of this VPWidenCallRecipe.
1667 InstructionCost computeCost(ElementCount VF,
1668 VPCostContext &Ctx) const override;
1669
1673
1676
1677#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1678 /// Print the recipe.
1679 void print(raw_ostream &O, const Twine &Indent,
1680 VPSlotTracker &SlotTracker) const override;
1681#endif
1682};
1683
1684/// A recipe representing a sequence of load -> update -> store as part of
1685/// a histogram operation. This means there may be aliasing between vector
1686/// lanes, which is handled by the llvm.experimental.vector.histogram family
1687/// of intrinsics. The only update operations currently supported are
1688/// 'add' and 'sub' where the other term is loop-invariant.
1690 /// Opcode of the update operation, currently either add or sub.
1691 unsigned Opcode;
1692
1693public:
1694 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1696 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1697
1698 ~VPHistogramRecipe() override = default;
1699
1701 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1702 }
1703
1704 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1705
1706 /// Produce a vectorized histogram operation.
1707 void execute(VPTransformState &State) override;
1708
1709 /// Return the cost of this VPHistogramRecipe.
1711 VPCostContext &Ctx) const override;
1712
1713 unsigned getOpcode() const { return Opcode; }
1714
1715 /// Return the mask operand if one was provided, or a null pointer if all
1716 /// lanes should be executed unconditionally.
1717 VPValue *getMask() const {
1718 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1719 }
1720
1721#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1722 /// Print the recipe
1723 void print(raw_ostream &O, const Twine &Indent,
1724 VPSlotTracker &SlotTracker) const override;
1725#endif
1726};
1727
1728/// A recipe for widening select instructions. Supports both wide vector and
1729/// single-scalar conditions, matching the behavior of LLVM IR's select
1730/// instruction.
1732 public VPIRMetadata {
1734 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I),
1735 VPIRMetadata(I) {}
1736
1737 ~VPWidenSelectRecipe() override = default;
1738
1743
1744 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1745
1746 /// Produce a widened version of the select instruction.
1747 void execute(VPTransformState &State) override;
1748
1749 /// Return the cost of this VPWidenSelectRecipe.
1750 InstructionCost computeCost(ElementCount VF,
1751 VPCostContext &Ctx) const override;
1752
1753#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1754 /// Print the recipe.
1755 void print(raw_ostream &O, const Twine &Indent,
1756 VPSlotTracker &SlotTracker) const override;
1757#endif
1758
1759 unsigned getOpcode() const { return Instruction::Select; }
1760
1761 VPValue *getCond() const {
1762 return getOperand(0);
1763 }
1764
1765 /// Returns true if the recipe only uses the first lane of operand \p Op.
1766 bool usesFirstLaneOnly(const VPValue *Op) const override {
1768 "Op must be an operand of the recipe");
1769 return Op == getCond() && Op->isDefinedOutsideLoopRegions();
1770 }
1771};
1772
1773/// A recipe for handling GEP instructions.
1775 Type *SourceElementTy;
1776
1777 bool isPointerLoopInvariant() const {
1778 return getOperand(0)->isDefinedOutsideLoopRegions();
1779 }
1780
1781 bool isIndexLoopInvariant(unsigned I) const {
1782 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1783 }
1784
1785 bool areAllOperandsInvariant() const {
1786 return all_of(operands(), [](VPValue *Op) {
1787 return Op->isDefinedOutsideLoopRegions();
1788 });
1789 }
1790
1791public:
1793 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),
1794 SourceElementTy(GEP->getSourceElementType()) {
1796 (void)Metadata;
1798 assert(Metadata.empty() && "unexpected metadata on GEP");
1799 }
1800
1801 ~VPWidenGEPRecipe() override = default;
1802
1807
1808 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1809
1810 /// This recipe generates a GEP instruction.
1811 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1812
1813 /// Generate the gep nodes.
1814 void execute(VPTransformState &State) override;
1815
1816 Type *getSourceElementType() const { return SourceElementTy; }
1817
1818 /// Return the cost of this VPWidenGEPRecipe.
1820 VPCostContext &Ctx) const override {
1821 // TODO: Compute accurate cost after retiring the legacy cost model.
1822 return 0;
1823 }
1824
1825#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1826 /// Print the recipe.
1827 void print(raw_ostream &O, const Twine &Indent,
1828 VPSlotTracker &SlotTracker) const override;
1829#endif
1830
1831 /// Returns true if the recipe only uses the first lane of operand \p Op.
1832 bool usesFirstLaneOnly(const VPValue *Op) const override {
1834 "Op must be an operand of the recipe");
1835 if (Op == getOperand(0))
1836 return isPointerLoopInvariant();
1837 else
1838 return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
1839 }
1840};
1841
1842/// A recipe to compute a pointer to the last element of each part of a widened
1843/// memory access for widened memory accesses of IndexedTy. Used for
1844/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1846 public VPUnrollPartAccessor<2> {
1847 Type *IndexedTy;
1848
1849 /// The constant stride of the pointer computed by this recipe, expressed in
1850 /// units of IndexedTy.
1851 int64_t Stride;
1852
1853public:
1855 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1856 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1857 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1858 IndexedTy(IndexedTy), Stride(Stride) {
1859 assert(Stride < 0 && "Stride must be negative");
1860 }
1861
1862 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1863
1865 const VPValue *getVFValue() const { return getOperand(1); }
1866
1867 void execute(VPTransformState &State) override;
1868
1869 bool usesFirstLaneOnly(const VPValue *Op) const override {
1871 "Op must be an operand of the recipe");
1872 return true;
1873 }
1874
1875 /// Return the cost of this VPVectorPointerRecipe.
1877 VPCostContext &Ctx) const override {
1878 // TODO: Compute accurate cost after retiring the legacy cost model.
1879 return 0;
1880 }
1881
1882 /// Returns true if the recipe only uses the first part of operand \p Op.
1883 bool usesFirstPartOnly(const VPValue *Op) const override {
1885 "Op must be an operand of the recipe");
1886 assert(getNumOperands() <= 2 && "must have at most two operands");
1887 return true;
1888 }
1889
1891 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1892 Stride, getGEPNoWrapFlags(),
1893 getDebugLoc());
1894 }
1895
1896#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1897 /// Print the recipe.
1898 void print(raw_ostream &O, const Twine &Indent,
1899 VPSlotTracker &SlotTracker) const override;
1900#endif
1901};
1902
1903/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1905 public VPUnrollPartAccessor<1> {
1906 Type *SourceElementTy;
1907
1908public:
1911 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1912 GEPFlags, DL),
1913 SourceElementTy(SourceElementTy) {}
1914
1915 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1916
1917 void execute(VPTransformState &State) override;
1918
1919 Type *getSourceElementType() const { return SourceElementTy; }
1920
1921 bool usesFirstLaneOnly(const VPValue *Op) const override {
1923 "Op must be an operand of the recipe");
1924 return true;
1925 }
1926
1927 /// Returns true if the recipe only uses the first part of operand \p Op.
1928 bool usesFirstPartOnly(const VPValue *Op) const override {
1930 "Op must be an operand of the recipe");
1931 assert(getNumOperands() <= 2 && "must have at most two operands");
1932 return true;
1933 }
1934
1936 return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
1938 }
1939
1940 /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
1941 /// this is only accurate after the VPlan has been unrolled.
1942 bool isFirstPart() const { return getUnrollPart(*this) == 0; }
1943
1944 /// Return the cost of this VPHeaderPHIRecipe.
1946 VPCostContext &Ctx) const override {
1947 // TODO: Compute accurate cost after retiring the legacy cost model.
1948 return 0;
1949 }
1950
1951#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1952 /// Print the recipe.
1953 void print(raw_ostream &O, const Twine &Indent,
1954 VPSlotTracker &SlotTracker) const override;
1955#endif
1956};
1957
1958/// A pure virtual base class for all recipes modeling header phis, including
1959/// phis for first order recurrences, pointer inductions and reductions. The
1960/// start value is the first operand of the recipe and the incoming value from
1961/// the backedge is the second operand.
1962///
1963/// Inductions are modeled using the following sub-classes:
1964/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1965/// starting at a specified value (zero for the main vector loop, the resume
1966/// value for the epilogue vector loop) and stepping by 1. The induction
1967/// controls exiting of the vector loop by comparing against the vector trip
1968/// count. Produces a single scalar PHI for the induction value per
1969/// iteration.
1970/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1971/// floating point inductions with arbitrary start and step values. Produces
1972/// a vector PHI per-part.
1973/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1974/// value of an IV with different start and step values. Produces a single
1975/// scalar value per iteration
1976/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1977/// canonical or derived induction.
1978/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1979/// pointer induction. Produces either a vector PHI per-part or scalar values
1980/// per-lane based on the canonical induction.
1982 public VPPhiAccessors {
1983protected:
1984 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1985 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
1986 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
1987 UnderlyingInstr, DL) {}
1988
1989 const VPRecipeBase *getAsRecipe() const override { return this; }
1990
1991public:
1992 ~VPHeaderPHIRecipe() override = default;
1993
1994 /// Method to support type inquiry through isa, cast, and dyn_cast.
1995 static inline bool classof(const VPRecipeBase *B) {
1996 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1997 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1998 }
1999 static inline bool classof(const VPValue *V) {
2000 auto *B = V->getDefiningRecipe();
2001 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2002 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2003 }
2004
2005 /// Generate the phi nodes.
2006 void execute(VPTransformState &State) override = 0;
2007
2008 /// Return the cost of this header phi recipe.
2010 VPCostContext &Ctx) const override;
2011
2012#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2013 /// Print the recipe.
2014 void print(raw_ostream &O, const Twine &Indent,
2015 VPSlotTracker &SlotTracker) const override = 0;
2016#endif
2017
2018 /// Returns the start value of the phi, if one is set.
2020 return getNumOperands() == 0 ? nullptr : getOperand(0);
2021 }
2023 return getNumOperands() == 0 ? nullptr : getOperand(0);
2024 }
2025
2026 /// Update the start value of the recipe.
2028
2029 /// Returns the incoming value from the loop backedge.
2031 return getOperand(1);
2032 }
2033
2034 /// Update the incoming value from the loop backedge.
2036
2037 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2038 /// to be a recipe.
2040 return *getBackedgeValue()->getDefiningRecipe();
2041 }
2042};
2043
2044/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2045/// VPWidenPointerInductionRecipe), providing shared functionality, including
2046/// retrieving the step value, induction descriptor and original phi node.
2048 const InductionDescriptor &IndDesc;
2049
2050public:
2051 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2052 VPValue *Step, const InductionDescriptor &IndDesc,
2053 DebugLoc DL)
2054 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2055 addOperand(Step);
2056 }
2057
2058 static inline bool classof(const VPRecipeBase *R) {
2059 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2060 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2061 }
2062
2063 static inline bool classof(const VPValue *V) {
2064 auto *R = V->getDefiningRecipe();
2065 return R && classof(R);
2066 }
2067
2068 static inline bool classof(const VPHeaderPHIRecipe *R) {
2069 return classof(static_cast<const VPRecipeBase *>(R));
2070 }
2071
2072 void execute(VPTransformState &State) override = 0;
2073
2074 /// Returns the step value of the induction.
2076 const VPValue *getStepValue() const { return getOperand(1); }
2077
2078 /// Update the step value of the recipe.
2079 void setStepValue(VPValue *V) { setOperand(1, V); }
2080
2082 const VPValue *getVFValue() const { return getOperand(2); }
2083
2084 /// Returns the number of incoming values, also number of incoming blocks.
2085 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2086 /// incoming value, its start value.
2087 unsigned getNumIncoming() const override { return 1; }
2088
2090
2091 /// Returns the induction descriptor for the recipe.
2092 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2093
2095 // TODO: All operands of base recipe must exist and be at same index in
2096 // derived recipe.
2098 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2099 }
2100
2102 // TODO: All operands of base recipe must exist and be at same index in
2103 // derived recipe.
2105 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2106 }
2107
2108 /// Returns true if the recipe only uses the first lane of operand \p Op.
2109 bool usesFirstLaneOnly(const VPValue *Op) const override {
2111 "Op must be an operand of the recipe");
2112 // The recipe creates its own wide start value, so it only requests the
2113 // first lane of the operand.
2114 // TODO: Remove once creating the start value is modeled separately.
2115 return Op == getStartValue() || Op == getStepValue();
2116 }
2117};
2118
2119/// A recipe for handling phi nodes of integer and floating-point inductions,
2120/// producing their vector values. This is an abstract recipe and must be
2121/// converted to concrete recipes before executing.
2123 TruncInst *Trunc;
2124
2125 // If this recipe is unrolled it will have 2 additional operands.
2126 bool isUnrolled() const { return getNumOperands() == 5; }
2127
2128public:
2130 VPValue *VF, const InductionDescriptor &IndDesc,
2131 DebugLoc DL)
2132 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2133 Step, IndDesc, DL),
2134 Trunc(nullptr) {
2135 addOperand(VF);
2136 }
2137
2139 VPValue *VF, const InductionDescriptor &IndDesc,
2140 TruncInst *Trunc, DebugLoc DL)
2141 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2142 Step, IndDesc, DL),
2143 Trunc(Trunc) {
2144 addOperand(VF);
2146 (void)Metadata;
2147 if (Trunc)
2149 assert(Metadata.empty() && "unexpected metadata on Trunc");
2150 }
2151
2153
2159
2160 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2161
2162 void execute(VPTransformState &State) override {
2163 llvm_unreachable("cannot execute this recipe, should be expanded via "
2164 "expandVPWidenIntOrFpInductionRecipe");
2165 }
2166
2167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2168 /// Print the recipe.
2169 void print(raw_ostream &O, const Twine &Indent,
2170 VPSlotTracker &SlotTracker) const override;
2171#endif
2172
2174 // If the recipe has been unrolled return the VPValue for the induction
2175 // increment.
2176 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2177 }
2178
2179 /// Returns the number of incoming values, also number of incoming blocks.
2180 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2181 /// incoming value, its start value.
2182 unsigned getNumIncoming() const override { return 1; }
2183
2184 /// Returns the first defined value as TruncInst, if it is one or nullptr
2185 /// otherwise.
2186 TruncInst *getTruncInst() { return Trunc; }
2187 const TruncInst *getTruncInst() const { return Trunc; }
2188
2189 /// Returns true if the induction is canonical, i.e. starting at 0 and
2190 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2191 /// same type as the canonical induction.
2192 bool isCanonical() const;
2193
2194 /// Returns the scalar type of the induction.
2196 return Trunc ? Trunc->getType()
2198 }
2199
2200 /// Returns the VPValue representing the value of this induction at
2201 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2202 /// take place.
2204 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2205 }
2206};
2207
2209 bool IsScalarAfterVectorization;
2210
2211public:
2212 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2213 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2214 /// VF*UF.
2216 VPValue *NumUnrolledElems,
2217 const InductionDescriptor &IndDesc,
2218 bool IsScalarAfterVectorization, DebugLoc DL)
2219 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2220 Step, IndDesc, DL),
2221 IsScalarAfterVectorization(IsScalarAfterVectorization) {
2222 addOperand(NumUnrolledElems);
2223 }
2224
2226
2230 getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
2231 getDebugLoc());
2232 }
2233
2234 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2235
2236 /// Generate vector values for the pointer induction.
2237 void execute(VPTransformState &State) override {
2238 llvm_unreachable("cannot execute this recipe, should be expanded via "
2239 "expandVPWidenPointerInduction");
2240 };
2241
2242 /// Returns true if only scalar values will be generated.
2243 bool onlyScalarsGenerated(bool IsScalable);
2244
2245#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2246 /// Print the recipe.
2247 void print(raw_ostream &O, const Twine &Indent,
2248 VPSlotTracker &SlotTracker) const override;
2249#endif
2250};
2251
2252/// A recipe for widened phis. Incoming values are operands of the recipe and
2253/// their operand index corresponds to the incoming predecessor block. If the
2254/// recipe is placed in an entry block to a (non-replicate) region, it must have
2255/// exactly 2 incoming values, the first from the predecessor of the region and
2256/// the second from the exiting block of the region.
2258 public VPPhiAccessors {
2259 /// Name to use for the generated IR instruction for the widened phi.
2260 std::string Name;
2261
2262protected:
2263 const VPRecipeBase *getAsRecipe() const override { return this; }
2264
2265public:
2266 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2267 /// debug location \p DL.
2268 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2269 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2270 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2271 if (Start)
2272 addOperand(Start);
2273 }
2274
2277 getOperand(0), getDebugLoc(), Name);
2279 C->addOperand(Op);
2280 return C;
2281 }
2282
2283 ~VPWidenPHIRecipe() override = default;
2284
2285 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2286
2287 /// Generate the phi/select nodes.
2288 void execute(VPTransformState &State) override;
2289
2290#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2291 /// Print the recipe.
2292 void print(raw_ostream &O, const Twine &Indent,
2293 VPSlotTracker &SlotTracker) const override;
2294#endif
2295};
2296
2297/// A recipe for handling first-order recurrence phis. The start value is the
2298/// first operand of the recipe and the incoming value from the backedge is the
2299/// second operand.
2302 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2303
2304 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2305
2310
2311 void execute(VPTransformState &State) override;
2312
2313 /// Return the cost of this first-order recurrence phi recipe.
2315 VPCostContext &Ctx) const override;
2316
2317#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2318 /// Print the recipe.
2319 void print(raw_ostream &O, const Twine &Indent,
2320 VPSlotTracker &SlotTracker) const override;
2321#endif
2322
2323 /// Returns true if the recipe only uses the first lane of operand \p Op.
2324 bool usesFirstLaneOnly(const VPValue *Op) const override {
2326 "Op must be an operand of the recipe");
2327 return Op == getStartValue();
2328 }
2329};
2330
2331/// A recipe for handling reduction phis. The start value is the first operand
2332/// of the recipe and the incoming value from the backedge is the second
2333/// operand.
2335 public VPUnrollPartAccessor<2> {
2336 /// The recurrence kind of the reduction.
2337 const RecurKind Kind;
2338
2339 /// The phi is part of an in-loop reduction.
2340 bool IsInLoop;
2341
2342 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2343 bool IsOrdered;
2344
2345 /// When expanding the reduction PHI, the plan's VF element count is divided
2346 /// by this factor to form the reduction phi's VF.
2347 unsigned VFScaleFactor = 1;
2348
2349public:
2350 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2352 bool IsInLoop = false, bool IsOrdered = false,
2353 unsigned VFScaleFactor = 1)
2354 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2355 IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2356 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2357 }
2358
2359 ~VPReductionPHIRecipe() override = default;
2360
2362 auto *R = new VPReductionPHIRecipe(
2364 *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
2365 R->addOperand(getBackedgeValue());
2366 return R;
2367 }
2368
2369 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2370
2371 /// Generate the phi/select nodes.
2372 void execute(VPTransformState &State) override;
2373
2374 /// Get the factor that the VF of this recipe's output should be scaled by.
2375 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2376
2377#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2378 /// Print the recipe.
2379 void print(raw_ostream &O, const Twine &Indent,
2380 VPSlotTracker &SlotTracker) const override;
2381#endif
2382
2383 /// Returns the number of incoming values, also number of incoming blocks.
2384 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2385 /// incoming value, its start value.
2386 unsigned getNumIncoming() const override { return 2; }
2387
2388 /// Returns the recurrence kind of the reduction.
2389 RecurKind getRecurrenceKind() const { return Kind; }
2390
2391 /// Returns true, if the phi is part of an ordered reduction.
2392 bool isOrdered() const { return IsOrdered; }
2393
2394 /// Returns true, if the phi is part of an in-loop reduction.
2395 bool isInLoop() const { return IsInLoop; }
2396
2397 /// Returns true if the recipe only uses the first lane of operand \p Op.
2398 bool usesFirstLaneOnly(const VPValue *Op) const override {
2400 "Op must be an operand of the recipe");
2401 return isOrdered() || isInLoop();
2402 }
2403};
2404
2405/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2406/// instructions.
2408public:
2409 /// The blend operation is a User of the incoming values and of their
2410 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2411 /// be omitted (implied by passing an odd number of operands) in which case
2412 /// all other incoming values are merged into it.
2414 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2415 assert(Operands.size() > 0 && "Expected at least one operand!");
2416 }
2417
2422
2423 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2424
2425 /// A normalized blend is one that has an odd number of operands, whereby the
2426 /// first operand does not have an associated mask.
2427 bool isNormalized() const { return getNumOperands() % 2; }
2428
2429 /// Return the number of incoming values, taking into account when normalized
2430 /// the first incoming value will have no mask.
2431 unsigned getNumIncomingValues() const {
2432 return (getNumOperands() + isNormalized()) / 2;
2433 }
2434
2435 /// Return incoming value number \p Idx.
2436 VPValue *getIncomingValue(unsigned Idx) const {
2437 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2438 }
2439
2440 /// Return mask number \p Idx.
2441 VPValue *getMask(unsigned Idx) const {
2442 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2443 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2444 }
2445
2446 /// Set mask number \p Idx to \p V.
2447 void setMask(unsigned Idx, VPValue *V) {
2448 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2449 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2450 }
2451
2452 void execute(VPTransformState &State) override {
2453 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2454 }
2455
2456 /// Return the cost of this VPWidenMemoryRecipe.
2457 InstructionCost computeCost(ElementCount VF,
2458 VPCostContext &Ctx) const override;
2459
2460#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2461 /// Print the recipe.
2462 void print(raw_ostream &O, const Twine &Indent,
2463 VPSlotTracker &SlotTracker) const override;
2464#endif
2465
2466 /// Returns true if the recipe only uses the first lane of operand \p Op.
2467 bool usesFirstLaneOnly(const VPValue *Op) const override {
2469 "Op must be an operand of the recipe");
2470 // Recursing through Blend recipes only, must terminate at header phi's the
2471 // latest.
2472 return all_of(users(),
2473 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2474 }
2475};
2476
2477/// A common base class for interleaved memory operations.
2478/// An Interleaved memory operation is a memory access method that combines
2479/// multiple strided loads/stores into a single wide load/store with shuffles.
2480/// The first operand is the start address. The optional operands are, in order,
2481/// the stored values and the mask.
2483 public VPIRMetadata {
2485
2486 /// Indicates if the interleave group is in a conditional block and requires a
2487 /// mask.
2488 bool HasMask = false;
2489
2490 /// Indicates if gaps between members of the group need to be masked out or if
2491 /// unusued gaps can be loaded speculatively.
2492 bool NeedsMaskForGaps = false;
2493
2494protected:
2495 VPInterleaveBase(const unsigned char SC,
2497 ArrayRef<VPValue *> Operands,
2498 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2499 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2500 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2501 NeedsMaskForGaps(NeedsMaskForGaps) {
2502 // TODO: extend the masked interleaved-group support to reversed access.
2503 assert((!Mask || !IG->isReverse()) &&
2504 "Reversed masked interleave-group not supported.");
2505 for (unsigned I = 0; I < IG->getFactor(); ++I)
2506 if (Instruction *Inst = IG->getMember(I)) {
2507 if (Inst->getType()->isVoidTy())
2508 continue;
2509 new VPValue(Inst, this);
2510 }
2511
2512 for (auto *SV : StoredValues)
2513 addOperand(SV);
2514 if (Mask) {
2515 HasMask = true;
2516 addOperand(Mask);
2517 }
2518 }
2519
2520public:
2521 VPInterleaveBase *clone() override = 0;
2522
2523 static inline bool classof(const VPRecipeBase *R) {
2524 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2525 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2526 }
2527
2528 static inline bool classof(const VPUser *U) {
2529 auto *R = dyn_cast<VPRecipeBase>(U);
2530 return R && classof(R);
2531 }
2532
2533 /// Return the address accessed by this recipe.
2534 VPValue *getAddr() const {
2535 return getOperand(0); // Address is the 1st, mandatory operand.
2536 }
2537
2538 /// Return the mask used by this recipe. Note that a full mask is represented
2539 /// by a nullptr.
2540 VPValue *getMask() const {
2541 // Mask is optional and the last operand.
2542 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2543 }
2544
2545 /// Return true if the access needs a mask because of the gaps.
2546 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2547
2549
2550 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2551
2552 void execute(VPTransformState &State) override {
2553 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2554 }
2555
2556 /// Return the cost of this recipe.
2557 InstructionCost computeCost(ElementCount VF,
2558 VPCostContext &Ctx) const override;
2559
2560 /// Returns true if the recipe only uses the first lane of operand \p Op.
2561 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2562
2563 /// Returns the number of stored operands of this interleave group. Returns 0
2564 /// for load interleave groups.
2565 virtual unsigned getNumStoreOperands() const = 0;
2566
2567 /// Return the VPValues stored by this interleave group. If it is a load
2568 /// interleave group, return an empty ArrayRef.
2570 return ArrayRef<VPValue *>(op_end() -
2571 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2573 }
2574};
2575
2576/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2577/// or stores into one wide load/store and shuffles. The first operand of a
2578/// VPInterleave recipe is the address, followed by the stored values, followed
2579/// by an optional mask.
2581public:
2583 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2584 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2585 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2586 NeedsMaskForGaps, MD, DL) {}
2587
2588 ~VPInterleaveRecipe() override = default;
2589
2593 needsMaskForGaps(), *this, getDebugLoc());
2594 }
2595
2596 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2597
2598 /// Generate the wide load or store, and shuffles.
2599 void execute(VPTransformState &State) override;
2600
2601#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2602 /// Print the recipe.
2603 void print(raw_ostream &O, const Twine &Indent,
2604 VPSlotTracker &SlotTracker) const override;
2605#endif
2606
2607 bool usesFirstLaneOnly(const VPValue *Op) const override {
2609 "Op must be an operand of the recipe");
2610 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2611 }
2612
2613 unsigned getNumStoreOperands() const override {
2614 return getNumOperands() - (getMask() ? 2 : 1);
2615 }
2616};
2617
2618/// A recipe for interleaved memory operations with vector-predication
2619/// intrinsics. The first operand is the address, the second operand is the
2620/// explicit vector length. Stored values and mask are optional operands.
2622public:
2624 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2625 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2626 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2627 R.getDebugLoc()) {
2628 assert(!getInterleaveGroup()->isReverse() &&
2629 "Reversed interleave-group with tail folding is not supported.");
2630 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2631 "supported for scalable vector.");
2632 }
2633
2634 ~VPInterleaveEVLRecipe() override = default;
2635
2637 llvm_unreachable("cloning not implemented yet");
2638 }
2639
2640 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2641
2642 /// The VPValue of the explicit vector length.
2643 VPValue *getEVL() const { return getOperand(1); }
2644
2645 /// Generate the wide load or store, and shuffles.
2646 void execute(VPTransformState &State) override;
2647
2648#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2649 /// Print the recipe.
2650 void print(raw_ostream &O, const Twine &Indent,
2651 VPSlotTracker &SlotTracker) const override;
2652#endif
2653
2654 /// The recipe only uses the first lane of the address, and EVL operand.
2655 bool usesFirstLaneOnly(const VPValue *Op) const override {
2657 "Op must be an operand of the recipe");
2658 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2659 Op == getEVL();
2660 }
2661
2662 unsigned getNumStoreOperands() const override {
2663 return getNumOperands() - (getMask() ? 3 : 2);
2664 }
2665};
2666
2667/// A recipe to represent inloop reduction operations, performing a reduction on
2668/// a vector operand into a scalar value, and adding the result to a chain.
2669/// The Operands are {ChainOp, VecOp, [Condition]}.
2671 /// The recurrence kind for the reduction in question.
2672 RecurKind RdxKind;
2673 bool IsOrdered;
2674 /// Whether the reduction is conditional.
2675 bool IsConditional = false;
2676
2677protected:
2678 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2680 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2681 bool IsOrdered, DebugLoc DL)
2682 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2683 IsOrdered(IsOrdered) {
2684 if (CondOp) {
2685 IsConditional = true;
2686 addOperand(CondOp);
2687 }
2689 }
2690
2691public:
2693 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2694 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2695 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2696 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2697 IsOrdered, DL) {}
2698
2700 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2701 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2702 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2703 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2704 IsOrdered, DL) {}
2705
2706 ~VPReductionRecipe() override = default;
2707
2709 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2711 getCondOp(), IsOrdered, getDebugLoc());
2712 }
2713
2714 static inline bool classof(const VPRecipeBase *R) {
2715 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2716 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
2717 R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
2718 }
2719
2720 static inline bool classof(const VPUser *U) {
2721 auto *R = dyn_cast<VPRecipeBase>(U);
2722 return R && classof(R);
2723 }
2724
2725 static inline bool classof(const VPValue *VPV) {
2726 const VPRecipeBase *R = VPV->getDefiningRecipe();
2727 return R && classof(R);
2728 }
2729
2730 static inline bool classof(const VPSingleDefRecipe *R) {
2731 return classof(static_cast<const VPRecipeBase *>(R));
2732 }
2733
2734 /// Generate the reduction in the loop.
2735 void execute(VPTransformState &State) override;
2736
2737 /// Return the cost of VPReductionRecipe.
2738 InstructionCost computeCost(ElementCount VF,
2739 VPCostContext &Ctx) const override;
2740
2741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2742 /// Print the recipe.
2743 void print(raw_ostream &O, const Twine &Indent,
2744 VPSlotTracker &SlotTracker) const override;
2745#endif
2746
2747 /// Return the recurrence kind for the in-loop reduction.
2748 RecurKind getRecurrenceKind() const { return RdxKind; }
2749 /// Return true if the in-loop reduction is ordered.
2750 bool isOrdered() const { return IsOrdered; };
2751 /// Return true if the in-loop reduction is conditional.
2752 bool isConditional() const { return IsConditional; };
2753 /// The VPValue of the scalar Chain being accumulated.
2754 VPValue *getChainOp() const { return getOperand(0); }
2755 /// The VPValue of the vector value to be reduced.
2756 VPValue *getVecOp() const { return getOperand(1); }
2757 /// The VPValue of the condition for the block.
2759 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2760 }
2761};
2762
2763/// A recipe for forming partial reductions. In the loop, an accumulator and
2764/// vector operand are added together and passed to the next iteration as the
2765/// next accumulator. After the loop body, the accumulator is reduced to a
2766/// scalar value.
2768 unsigned Opcode;
2769
2770 /// The divisor by which the VF of this recipe's output should be divided
2771 /// during execution.
2772 unsigned VFScaleFactor;
2773
2774public:
2776 VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2777 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
2778 VFScaleFactor, ReductionInst) {}
2779 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2780 VPValue *Cond, unsigned ScaleFactor,
2781 Instruction *ReductionInst = nullptr)
2782 : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2783 FastMathFlags(), ReductionInst,
2784 ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2785 Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2786 [[maybe_unused]] auto *AccumulatorRecipe =
2788 // When cloning as part of a VPExpressionRecipe the chain op could have
2789 // replaced by a temporary VPValue, so it doesn't have a defining recipe.
2790 assert((!AccumulatorRecipe ||
2791 isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2792 isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2793 "Unexpected operand order for partial reduction recipe");
2794 }
2795 ~VPPartialReductionRecipe() override = default;
2796
2798 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2799 getCondOp(), VFScaleFactor,
2801 }
2802
2803 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2804
2805 /// Generate the reduction in the loop.
2806 void execute(VPTransformState &State) override;
2807
2808 /// Return the cost of this VPPartialReductionRecipe.
2810 VPCostContext &Ctx) const override;
2811
2812 /// Get the binary op's opcode.
2813 unsigned getOpcode() const { return Opcode; }
2814
2815 /// Get the factor that the VF of this recipe's output should be scaled by.
2816 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2817
2818#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2819 /// Print the recipe.
2820 void print(raw_ostream &O, const Twine &Indent,
2821 VPSlotTracker &SlotTracker) const override;
2822#endif
2823};
2824
2825/// A recipe to represent inloop reduction operations with vector-predication
2826/// intrinsics, performing a reduction on a vector operand with the explicit
2827/// vector length (EVL) into a scalar value, and adding the result to a chain.
2828/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2830public:
2834 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2835 R.getFastMathFlags(),
2837 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2838 R.isOrdered(), DL) {}
2839
2840 ~VPReductionEVLRecipe() override = default;
2841
2843 llvm_unreachable("cloning not implemented yet");
2844 }
2845
2846 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2847
2848 /// Generate the reduction in the loop
2849 void execute(VPTransformState &State) override;
2850
2851#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2852 /// Print the recipe.
2853 void print(raw_ostream &O, const Twine &Indent,
2854 VPSlotTracker &SlotTracker) const override;
2855#endif
2856
2857 /// The VPValue of the explicit vector length.
2858 VPValue *getEVL() const { return getOperand(2); }
2859
2860 /// Returns true if the recipe only uses the first lane of operand \p Op.
2861 bool usesFirstLaneOnly(const VPValue *Op) const override {
2863 "Op must be an operand of the recipe");
2864 return Op == getEVL();
2865 }
2866};
2867
2868/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2869/// copies of the original scalar type, one per lane, instead of producing a
2870/// single copy of widened type for all lanes. If the instruction is known to be
2871/// a single scalar, only one copy, per lane zero, will be generated.
2873 public VPIRMetadata {
2874 /// Indicator if only a single replica per lane is needed.
2875 bool IsSingleScalar;
2876
2877 /// Indicator if the replicas are also predicated.
2878 bool IsPredicated;
2879
2880public:
2882 bool IsSingleScalar, VPValue *Mask = nullptr,
2883 VPIRMetadata Metadata = {})
2884 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2885 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2886 IsPredicated(Mask) {
2887 if (Mask)
2888 addOperand(Mask);
2889 }
2890
2891 ~VPReplicateRecipe() override = default;
2892
2894 auto *Copy =
2895 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar,
2896 isPredicated() ? getMask() : nullptr, *this);
2897 Copy->transferFlags(*this);
2898 return Copy;
2899 }
2900
2901 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2902
2903 /// Generate replicas of the desired Ingredient. Replicas will be generated
2904 /// for all parts and lanes unless a specific part and lane are specified in
2905 /// the \p State.
2906 void execute(VPTransformState &State) override;
2907
2908 /// Return the cost of this VPReplicateRecipe.
2909 InstructionCost computeCost(ElementCount VF,
2910 VPCostContext &Ctx) const override;
2911
2912#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2913 /// Print the recipe.
2914 void print(raw_ostream &O, const Twine &Indent,
2915 VPSlotTracker &SlotTracker) const override;
2916#endif
2917
2918 bool isSingleScalar() const { return IsSingleScalar; }
2919
2920 bool isPredicated() const { return IsPredicated; }
2921
2922 /// Returns true if the recipe only uses the first lane of operand \p Op.
2923 bool usesFirstLaneOnly(const VPValue *Op) const override {
2925 "Op must be an operand of the recipe");
2926 return isSingleScalar();
2927 }
2928
2929 /// Returns true if the recipe uses scalars of operand \p Op.
2930 bool usesScalars(const VPValue *Op) const override {
2932 "Op must be an operand of the recipe");
2933 return true;
2934 }
2935
2936 /// Returns true if the recipe is used by a widened recipe via an intervening
2937 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2938 /// in a vector.
2939 bool shouldPack() const;
2940
2941 /// Return the mask of a predicated VPReplicateRecipe.
2943 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2944 return getOperand(getNumOperands() - 1);
2945 }
2946
2947 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2948};
2949
2950/// A recipe for generating conditional branches on the bits of a mask.
2952public:
2954 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
2955
2958 }
2959
2960 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2961
2962 /// Generate the extraction of the appropriate bit from the block mask and the
2963 /// conditional branch.
2964 void execute(VPTransformState &State) override;
2965
2966 /// Return the cost of this VPBranchOnMaskRecipe.
2967 InstructionCost computeCost(ElementCount VF,
2968 VPCostContext &Ctx) const override;
2969
2970#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2971 /// Print the recipe.
2972 void print(raw_ostream &O, const Twine &Indent,
2973 VPSlotTracker &SlotTracker) const override {
2974 O << Indent << "BRANCH-ON-MASK ";
2976 }
2977#endif
2978
2979 /// Returns true if the recipe uses scalars of operand \p Op.
2980 bool usesScalars(const VPValue *Op) const override {
2982 "Op must be an operand of the recipe");
2983 return true;
2984 }
2985};
2986
2987/// A recipe to combine multiple recipes into a single 'expression' recipe,
2988/// which should be considered a single entity for cost-modeling and transforms.
2989/// The recipe needs to be 'decomposed', i.e. replaced by its individual
2990/// expression recipes, before execute. The individual expression recipes are
2991/// completely disconnected from the def-use graph of other recipes not part of
2992/// the expression. Def-use edges between pairs of expression recipes remain
2993/// intact, whereas every edge between an expression recipe and a recipe outside
2994/// the expression is elevated to connect the non-expression recipe with the
2995/// VPExpressionRecipe itself.
2996class VPExpressionRecipe : public VPSingleDefRecipe {
2997 /// Recipes included in this VPExpressionRecipe. This could contain
2998 /// duplicates.
2999 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3000
3001 /// Temporary VPValues used for external operands of the expression, i.e.
3002 /// operands not defined by recipes in the expression.
3003 SmallVector<VPValue *> LiveInPlaceholders;
3004
3005 enum class ExpressionTypes {
3006 /// Represents an inloop extended reduction operation, performing a
3007 /// reduction on an extended vector operand into a scalar value, and adding
3008 /// the result to a chain.
3009 ExtendedReduction,
3010 /// Represent an inloop multiply-accumulate reduction, multiplying the
3011 /// extended vector operands, performing a reduction.add on the result, and
3012 /// adding the scalar result to a chain.
3013 ExtMulAccReduction,
3014 /// Represent an inloop multiply-accumulate reduction, multiplying the
3015 /// vector operands, performing a reduction.add on the result, and adding
3016 /// the scalar result to a chain.
3017 MulAccReduction,
3018 /// Represent an inloop multiply-accumulate reduction, multiplying the
3019 /// extended vector operands, negating the multiplication, performing a
3020 /// reduction.add on the result, and adding the scalar result to a chain.
3021 ExtNegatedMulAccReduction,
3022 };
3023
3024 /// Type of the expression.
3025 ExpressionTypes ExpressionType;
3026
3027 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3028 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3029 /// in the expression) are replaced by temporary VPValues and the original
3030 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3031 /// as needed (excluding last) to ensure they are only used by other recipes
3032 /// in the expression.
3033 VPExpressionRecipe(ExpressionTypes ExpressionType,
3034 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3035
3036public:
3038 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3040 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3043 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3044 {Ext0, Ext1, Mul, Red}) {}
3047 VPReductionRecipe *Red)
3048 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3049 {Ext0, Ext1, Mul, Sub, Red}) {
3050 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3051 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3052 "Expected an add reduction");
3053 assert(getNumOperands() >= 3 && "Expected at least three operands");
3054 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3055 assert(SubConst && SubConst->getValue() == 0 &&
3056 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3057 }
3058
3060 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3061 for (auto *R : reverse(ExpressionRecipes)) {
3062 if (ExpressionRecipesSeen.insert(R).second)
3063 delete R;
3064 }
3065 for (VPValue *T : LiveInPlaceholders)
3066 delete T;
3067 }
3068
3069 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3070
3071 VPExpressionRecipe *clone() override {
3072 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3073 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3074 for (auto *R : ExpressionRecipes)
3075 NewExpressiondRecipes.push_back(R->clone());
3076 for (auto *New : NewExpressiondRecipes) {
3077 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3078 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3079 // Update placeholder operands in the cloned recipe to use the external
3080 // operands, to be internalized when the cloned expression is constructed.
3081 for (const auto &[Placeholder, OutsideOp] :
3082 zip(LiveInPlaceholders, operands()))
3083 New->replaceUsesOfWith(Placeholder, OutsideOp);
3084 }
3085 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3086 }
3087
3088 /// Return the VPValue to use to infer the result type of the recipe.
3090 unsigned OpIdx =
3091 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3092 : 1;
3093 return getOperand(getNumOperands() - OpIdx);
3094 }
3095
3096 /// Insert the recipes of the expression back into the VPlan, directly before
3097 /// the current recipe. Leaves the expression recipe empty, which must be
3098 /// removed before codegen.
3099 void decompose();
3100
3101 unsigned getVFScaleFactor() const {
3102 auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
3103 return PR ? PR->getVFScaleFactor() : 1;
3104 }
3105
3106 /// Method for generating code, must not be called as this recipe is abstract.
3107 void execute(VPTransformState &State) override {
3108 llvm_unreachable("recipe must be removed before execute");
3109 }
3110
3112 VPCostContext &Ctx) const override;
3113
3114#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3115 /// Print the recipe.
3116 void print(raw_ostream &O, const Twine &Indent,
3117 VPSlotTracker &SlotTracker) const override;
3118#endif
3119
3120 /// Returns true if this expression contains recipes that may read from or
3121 /// write to memory.
3122 bool mayReadOrWriteMemory() const;
3123
3124 /// Returns true if this expression contains recipes that may have side
3125 /// effects.
3126 bool mayHaveSideEffects() const;
3127
3128 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3129 bool isSingleScalar() const;
3130};
3131
3132/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3133/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3134/// order to merge values that are set under such a branch and feed their uses.
3135/// The phi nodes can be scalar or vector depending on the users of the value.
3136/// This recipe works in concert with VPBranchOnMaskRecipe.
3138public:
3139 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3140 /// nodes after merging back from a Branch-on-Mask.
3142 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3143 ~VPPredInstPHIRecipe() override = default;
3144
3146 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3147 }
3148
3149 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3150
3151 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3152 /// retain SSA form.
3153 void execute(VPTransformState &State) override;
3154
3155 /// Return the cost of this VPPredInstPHIRecipe.
3157 VPCostContext &Ctx) const override {
3158 // TODO: Compute accurate cost after retiring the legacy cost model.
3159 return 0;
3160 }
3161
3162#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3163 /// Print the recipe.
3164 void print(raw_ostream &O, const Twine &Indent,
3165 VPSlotTracker &SlotTracker) const override;
3166#endif
3167
3168 /// Returns true if the recipe uses scalars of operand \p Op.
3169 bool usesScalars(const VPValue *Op) const override {
3171 "Op must be an operand of the recipe");
3172 return true;
3173 }
3174};
3175
3176/// A common base class for widening memory operations. An optional mask can be
3177/// provided as the last operand.
3179 public VPIRMetadata {
3180protected:
3182
3183 /// Alignment information for this memory access.
3185
3186 /// Whether the accessed addresses are consecutive.
3188
3189 /// Whether the consecutive accessed addresses are in reverse order.
3191
3192 /// Whether the memory access is masked.
3193 bool IsMasked = false;
3194
3195 void setMask(VPValue *Mask) {
3196 assert(!IsMasked && "cannot re-set mask");
3197 if (!Mask)
3198 return;
3199 addOperand(Mask);
3200 IsMasked = true;
3201 }
3202
3203 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3204 std::initializer_list<VPValue *> Operands,
3205 bool Consecutive, bool Reverse,
3206 const VPIRMetadata &Metadata, DebugLoc DL)
3207 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3209 Reverse(Reverse) {
3210 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3212 "Reversed acccess without VPVectorEndPointerRecipe address?");
3213 }
3214
3215public:
3217 llvm_unreachable("cloning not supported");
3218 }
3219
3220 static inline bool classof(const VPRecipeBase *R) {
3221 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3222 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3223 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3224 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3225 }
3226
3227 static inline bool classof(const VPUser *U) {
3228 auto *R = dyn_cast<VPRecipeBase>(U);
3229 return R && classof(R);
3230 }
3231
3232 /// Return whether the loaded-from / stored-to addresses are consecutive.
3233 bool isConsecutive() const { return Consecutive; }
3234
3235 /// Return whether the consecutive loaded/stored addresses are in reverse
3236 /// order.
3237 bool isReverse() const { return Reverse; }
3238
3239 /// Return the address accessed by this recipe.
3240 VPValue *getAddr() const { return getOperand(0); }
3241
3242 /// Returns true if the recipe is masked.
3243 bool isMasked() const { return IsMasked; }
3244
3245 /// Return the mask used by this recipe. Note that a full mask is represented
3246 /// by a nullptr.
3247 VPValue *getMask() const {
3248 // Mask is optional and therefore the last operand.
3249 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3250 }
3251
3252 /// Returns the alignment of the memory access.
3253 Align getAlign() const { return Alignment; }
3254
3255 /// Generate the wide load/store.
3256 void execute(VPTransformState &State) override {
3257 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3258 }
3259
3260 /// Return the cost of this VPWidenMemoryRecipe.
3261 InstructionCost computeCost(ElementCount VF,
3262 VPCostContext &Ctx) const override;
3263
3265};
3266
3267/// A recipe for widening load operations, using the address to load from and an
3268/// optional mask.
3270 public VPValue {
3272 bool Consecutive, bool Reverse,
3273 const VPIRMetadata &Metadata, DebugLoc DL)
3274 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3275 Reverse, Metadata, DL),
3276 VPValue(this, &Load) {
3277 setMask(Mask);
3278 }
3279
3282 getMask(), Consecutive, Reverse, *this,
3283 getDebugLoc());
3284 }
3285
3286 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3287
3288 /// Generate a wide load or gather.
3289 void execute(VPTransformState &State) override;
3290
3291#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3292 /// Print the recipe.
3293 void print(raw_ostream &O, const Twine &Indent,
3294 VPSlotTracker &SlotTracker) const override;
3295#endif
3296
3297 /// Returns true if the recipe only uses the first lane of operand \p Op.
3298 bool usesFirstLaneOnly(const VPValue *Op) const override {
3300 "Op must be an operand of the recipe");
3301 // Widened, consecutive loads operations only demand the first lane of
3302 // their address.
3303 return Op == getAddr() && isConsecutive();
3304 }
3305};
3306
3307/// A recipe for widening load operations with vector-predication intrinsics,
3308/// using the address to load from, the explicit vector length and an optional
3309/// mask.
3310struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3312 VPValue *Mask)
3313 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3314 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3315 L.getDebugLoc()),
3316 VPValue(this, &getIngredient()) {
3317 setMask(Mask);
3318 }
3319
3320 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3321
3322 /// Return the EVL operand.
3323 VPValue *getEVL() const { return getOperand(1); }
3324
3325 /// Generate the wide load or gather.
3326 void execute(VPTransformState &State) override;
3327
3328 /// Return the cost of this VPWidenLoadEVLRecipe.
3330 VPCostContext &Ctx) const override;
3331
3332#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3333 /// Print the recipe.
3334 void print(raw_ostream &O, const Twine &Indent,
3335 VPSlotTracker &SlotTracker) const override;
3336#endif
3337
3338 /// Returns true if the recipe only uses the first lane of operand \p Op.
3339 bool usesFirstLaneOnly(const VPValue *Op) const override {
3341 "Op must be an operand of the recipe");
3342 // Widened loads only demand the first lane of EVL and consecutive loads
3343 // only demand the first lane of their address.
3344 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3345 }
3346};
3347
3348/// A recipe for widening store operations, using the stored value, the address
3349/// to store to and an optional mask.
3351 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3352 VPValue *Mask, bool Consecutive, bool Reverse,
3353 const VPIRMetadata &Metadata, DebugLoc DL)
3354 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3355 Consecutive, Reverse, Metadata, DL) {
3356 setMask(Mask);
3357 }
3358
3364
3365 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3366
3367 /// Return the value stored by this recipe.
3368 VPValue *getStoredValue() const { return getOperand(1); }
3369
3370 /// Generate a wide store or scatter.
3371 void execute(VPTransformState &State) override;
3372
3373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3374 /// Print the recipe.
3375 void print(raw_ostream &O, const Twine &Indent,
3376 VPSlotTracker &SlotTracker) const override;
3377#endif
3378
3379 /// Returns true if the recipe only uses the first lane of operand \p Op.
3380 bool usesFirstLaneOnly(const VPValue *Op) const override {
3382 "Op must be an operand of the recipe");
3383 // Widened, consecutive stores only demand the first lane of their address,
3384 // unless the same operand is also stored.
3385 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3386 }
3387};
3388
3389/// A recipe for widening store operations with vector-predication intrinsics,
3390/// using the value to store, the address to store to, the explicit vector
3391/// length and an optional mask.
3394 VPValue *Mask)
3395 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3396 {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3397 S.isReverse(), S, S.getDebugLoc()) {
3398 setMask(Mask);
3399 }
3400
3401 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3402
3403 /// Return the address accessed by this recipe.
3404 VPValue *getStoredValue() const { return getOperand(1); }
3405
3406 /// Return the EVL operand.
3407 VPValue *getEVL() const { return getOperand(2); }
3408
3409 /// Generate the wide store or scatter.
3410 void execute(VPTransformState &State) override;
3411
3412 /// Return the cost of this VPWidenStoreEVLRecipe.
3414 VPCostContext &Ctx) const override;
3415
3416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3417 /// Print the recipe.
3418 void print(raw_ostream &O, const Twine &Indent,
3419 VPSlotTracker &SlotTracker) const override;
3420#endif
3421
3422 /// Returns true if the recipe only uses the first lane of operand \p Op.
3423 bool usesFirstLaneOnly(const VPValue *Op) const override {
3425 "Op must be an operand of the recipe");
3426 if (Op == getEVL()) {
3427 assert(getStoredValue() != Op && "unexpected store of EVL");
3428 return true;
3429 }
3430 // Widened, consecutive memory operations only demand the first lane of
3431 // their address, unless the same operand is also stored. That latter can
3432 // happen with opaque pointers.
3433 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3434 }
3435};
3436
3437/// Recipe to expand a SCEV expression.
3439 const SCEV *Expr;
3440
3441public:
3443 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3444
3445 ~VPExpandSCEVRecipe() override = default;
3446
3447 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3448
3449 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3450
3451 void execute(VPTransformState &State) override {
3452 llvm_unreachable("SCEV expressions must be expanded before final execute");
3453 }
3454
3455 /// Return the cost of this VPExpandSCEVRecipe.
3457 VPCostContext &Ctx) const override {
3458 // TODO: Compute accurate cost after retiring the legacy cost model.
3459 return 0;
3460 }
3461
3462#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3463 /// Print the recipe.
3464 void print(raw_ostream &O, const Twine &Indent,
3465 VPSlotTracker &SlotTracker) const override;
3466#endif
3467
3468 const SCEV *getSCEV() const { return Expr; }
3469};
3470
3471/// Canonical scalar induction phi of the vector loop. Starting at the specified
3472/// start value (either 0 or the resume value when vectorizing the epilogue
3473/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3474/// canonical induction variable.
3476public:
3478 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3479
3480 ~VPCanonicalIVPHIRecipe() override = default;
3481
3483 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3484 R->addOperand(getBackedgeValue());
3485 return R;
3486 }
3487
3488 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3489
3490 void execute(VPTransformState &State) override {
3491 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3492 "scalar phi recipe");
3493 }
3494
3495#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3496 /// Print the recipe.
3497 void print(raw_ostream &O, const Twine &Indent,
3498 VPSlotTracker &SlotTracker) const override;
3499#endif
3500
3501 /// Returns the scalar type of the induction.
3503 return getStartValue()->getLiveInIRValue()->getType();
3504 }
3505
3506 /// Returns true if the recipe only uses the first lane of operand \p Op.
3507 bool usesFirstLaneOnly(const VPValue *Op) const override {
3509 "Op must be an operand of the recipe");
3510 return true;
3511 }
3512
3513 /// Returns true if the recipe only uses the first part of operand \p Op.
3514 bool usesFirstPartOnly(const VPValue *Op) const override {
3516 "Op must be an operand of the recipe");
3517 return true;
3518 }
3519
3520 /// Return the cost of this VPCanonicalIVPHIRecipe.
3522 VPCostContext &Ctx) const override {
3523 // For now, match the behavior of the legacy cost model.
3524 return 0;
3525 }
3526};
3527
3528/// A recipe for generating the active lane mask for the vector loop that is
3529/// used to predicate the vector operations.
3530/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3531/// remove VPActiveLaneMaskPHIRecipe.
3533public:
3535 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3536 DL) {}
3537
3538 ~VPActiveLaneMaskPHIRecipe() override = default;
3539
3542 if (getNumOperands() == 2)
3543 R->addOperand(getOperand(1));
3544 return R;
3545 }
3546
3547 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3548
3549 /// Generate the active lane mask phi of the vector loop.
3550 void execute(VPTransformState &State) override;
3551
3552#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3553 /// Print the recipe.
3554 void print(raw_ostream &O, const Twine &Indent,
3555 VPSlotTracker &SlotTracker) const override;
3556#endif
3557};
3558
3559/// A recipe for generating the phi node for the current index of elements,
3560/// adjusted in accordance with EVL value. It starts at the start value of the
3561/// canonical induction and gets incremented by EVL in each iteration of the
3562/// vector loop.
3564public:
3566 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3567
3568 ~VPEVLBasedIVPHIRecipe() override = default;
3569
3571 llvm_unreachable("cloning not implemented yet");
3572 }
3573
3574 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3575
3576 void execute(VPTransformState &State) override {
3577 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3578 "scalar phi recipe");
3579 }
3580
3581 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3583 VPCostContext &Ctx) const override {
3584 // For now, match the behavior of the legacy cost model.
3585 return 0;
3586 }
3587
3588 /// Returns true if the recipe only uses the first lane of operand \p Op.
3589 bool usesFirstLaneOnly(const VPValue *Op) const override {
3591 "Op must be an operand of the recipe");
3592 return true;
3593 }
3594
3595#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3596 /// Print the recipe.
3597 void print(raw_ostream &O, const Twine &Indent,
3598 VPSlotTracker &SlotTracker) const override;
3599#endif
3600};
3601
3602/// A Recipe for widening the canonical induction variable of the vector loop.
3604 public VPUnrollPartAccessor<1> {
3605public:
3607 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3608
3609 ~VPWidenCanonicalIVRecipe() override = default;
3610
3615
3616 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3617
3618 /// Generate a canonical vector induction variable of the vector loop, with
3619 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3620 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3621 void execute(VPTransformState &State) override;
3622
3623 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3625 VPCostContext &Ctx) const override {
3626 // TODO: Compute accurate cost after retiring the legacy cost model.
3627 return 0;
3628 }
3629
3630#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3631 /// Print the recipe.
3632 void print(raw_ostream &O, const Twine &Indent,
3633 VPSlotTracker &SlotTracker) const override;
3634#endif
3635};
3636
3637/// A recipe for converting the input value \p IV value to the corresponding
3638/// value of an IV with different start and step values, using Start + IV *
3639/// Step.
3641 /// Kind of the induction.
3643 /// If not nullptr, the floating point induction binary operator. Must be set
3644 /// for floating point inductions.
3645 const FPMathOperator *FPBinOp;
3646
3647 /// Name to use for the generated IR instruction for the derived IV.
3648 std::string Name;
3649
3650public:
3652 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3653 const Twine &Name = "")
3655 IndDesc.getKind(),
3656 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3657 Start, CanonicalIV, Step, Name) {}
3658
3660 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3661 VPValue *Step, const Twine &Name = "")
3662 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3663 FPBinOp(FPBinOp), Name(Name.str()) {}
3664
3665 ~VPDerivedIVRecipe() override = default;
3666
3668 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3669 getStepValue());
3670 }
3671
3672 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3673
3674 /// Generate the transformed value of the induction at offset StartValue (1.
3675 /// operand) + IV (2. operand) * StepValue (3, operand).
3676 void execute(VPTransformState &State) override;
3677
3678 /// Return the cost of this VPDerivedIVRecipe.
3680 VPCostContext &Ctx) const override {
3681 // TODO: Compute accurate cost after retiring the legacy cost model.
3682 return 0;
3683 }
3684
3685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3686 /// Print the recipe.
3687 void print(raw_ostream &O, const Twine &Indent,
3688 VPSlotTracker &SlotTracker) const override;
3689#endif
3690
3692 return getStartValue()->getLiveInIRValue()->getType();
3693 }
3694
3695 VPValue *getStartValue() const { return getOperand(0); }
3696 VPValue *getStepValue() const { return getOperand(2); }
3697
3698 /// Returns true if the recipe only uses the first lane of operand \p Op.
3699 bool usesFirstLaneOnly(const VPValue *Op) const override {
3701 "Op must be an operand of the recipe");
3702 return true;
3703 }
3704};
3705
3706/// A recipe for handling phi nodes of integer and floating-point inductions,
3707/// producing their scalar values.
3709 public VPUnrollPartAccessor<3> {
3710 Instruction::BinaryOps InductionOpcode;
3711
3712public:
3715 DebugLoc DL)
3716 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3717 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3718 InductionOpcode(Opcode) {}
3719
3721 VPValue *Step, VPValue *VF,
3724 IV, Step, VF, IndDesc.getInductionOpcode(),
3725 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3726 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3727 : FastMathFlags(),
3728 DL) {}
3729
3730 ~VPScalarIVStepsRecipe() override = default;
3731
3733 return new VPScalarIVStepsRecipe(
3734 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3736 getDebugLoc());
3737 }
3738
3739 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3740 /// this is only accurate after the VPlan has been unrolled.
3741 bool isPart0() const { return getUnrollPart(*this) == 0; }
3742
3743 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3744
3745 /// Generate the scalarized versions of the phi node as needed by their users.
3746 void execute(VPTransformState &State) override;
3747
3748 /// Return the cost of this VPScalarIVStepsRecipe.
3750 VPCostContext &Ctx) const override {
3751 // TODO: Compute accurate cost after retiring the legacy cost model.
3752 return 0;
3753 }
3754
3755#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3756 /// Print the recipe.
3757 void print(raw_ostream &O, const Twine &Indent,
3758 VPSlotTracker &SlotTracker) const override;
3759#endif
3760
3761 VPValue *getStepValue() const { return getOperand(1); }
3762
3763 /// Returns true if the recipe only uses the first lane of operand \p Op.
3764 bool usesFirstLaneOnly(const VPValue *Op) const override {
3766 "Op must be an operand of the recipe");
3767 return true;
3768 }
3769};
3770
3771/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3772/// types implementing VPPhiAccessors. Used by isa<> & co.
3774 static inline bool isPossible(const VPRecipeBase *f) {
3775 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3777 }
3778};
3779/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3780/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3781template <typename SrcTy>
3782struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3783
3785
3786 /// doCast is used by cast<>.
3787 static inline VPPhiAccessors *doCast(SrcTy R) {
3788 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3789 switch (R->getVPDefID()) {
3790 case VPDef::VPInstructionSC:
3791 return cast<VPPhi>(R);
3792 case VPDef::VPIRInstructionSC:
3793 return cast<VPIRPhi>(R);
3794 case VPDef::VPWidenPHISC:
3795 return cast<VPWidenPHIRecipe>(R);
3796 default:
3797 return cast<VPHeaderPHIRecipe>(R);
3798 }
3799 }());
3800 }
3801
3802 /// doCastIfPossible is used by dyn_cast<>.
3803 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3804 if (!Self::isPossible(f))
3805 return nullptr;
3806 return doCast(f);
3807 }
3808};
3809template <>
3812template <>
3815
3816/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3817/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3818/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3819class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3820 friend class VPlan;
3821
3822 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3823 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3824 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3825 if (Recipe)
3826 appendRecipe(Recipe);
3827 }
3828
3829public:
3831
3832protected:
3833 /// The VPRecipes held in the order of output instructions to generate.
3835
3836 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3837 : VPBlockBase(BlockSC, Name.str()) {}
3838
3839public:
3840 ~VPBasicBlock() override {
3841 while (!Recipes.empty())
3842 Recipes.pop_back();
3843 }
3844
3845 /// Instruction iterators...
3850
3851 //===--------------------------------------------------------------------===//
3852 /// Recipe iterator methods
3853 ///
3854 inline iterator begin() { return Recipes.begin(); }
3855 inline const_iterator begin() const { return Recipes.begin(); }
3856 inline iterator end() { return Recipes.end(); }
3857 inline const_iterator end() const { return Recipes.end(); }
3858
3859 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3860 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3861 inline reverse_iterator rend() { return Recipes.rend(); }
3862 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3863
3864 inline size_t size() const { return Recipes.size(); }
3865 inline bool empty() const { return Recipes.empty(); }
3866 inline const VPRecipeBase &front() const { return Recipes.front(); }
3867 inline VPRecipeBase &front() { return Recipes.front(); }
3868 inline const VPRecipeBase &back() const { return Recipes.back(); }
3869 inline VPRecipeBase &back() { return Recipes.back(); }
3870
3871 /// Returns a reference to the list of recipes.
3873
3874 /// Returns a pointer to a member of the recipe list.
3875 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
3876 return &VPBasicBlock::Recipes;
3877 }
3878
3879 /// Method to support type inquiry through isa, cast, and dyn_cast.
3880 static inline bool classof(const VPBlockBase *V) {
3881 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3882 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3883 }
3884
3885 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3886 assert(Recipe && "No recipe to append.");
3887 assert(!Recipe->Parent && "Recipe already in VPlan");
3888 Recipe->Parent = this;
3889 Recipes.insert(InsertPt, Recipe);
3890 }
3891
3892 /// Augment the existing recipes of a VPBasicBlock with an additional
3893 /// \p Recipe as the last recipe.
3894 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3895
3896 /// The method which generates the output IR instructions that correspond to
3897 /// this VPBasicBlock, thereby "executing" the VPlan.
3898 void execute(VPTransformState *State) override;
3899
3900 /// Return the cost of this VPBasicBlock.
3901 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3902
3903 /// Return the position of the first non-phi node recipe in the block.
3904 iterator getFirstNonPhi();
3905
3906 /// Returns an iterator range over the PHI-like recipes in the block.
3910
3911 /// Split current block at \p SplitAt by inserting a new block between the
3912 /// current block and its successors and moving all recipes starting at
3913 /// SplitAt to the new block. Returns the new block.
3914 VPBasicBlock *splitAt(iterator SplitAt);
3915
3916 VPRegionBlock *getEnclosingLoopRegion();
3917 const VPRegionBlock *getEnclosingLoopRegion() const;
3918
3919#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3920 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3921 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3922 ///
3923 /// Note that the numbering is applied to the whole VPlan, so printing
3924 /// individual blocks is consistent with the whole VPlan printing.
3925 void print(raw_ostream &O, const Twine &Indent,
3926 VPSlotTracker &SlotTracker) const override;
3927 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3928#endif
3929
3930 /// If the block has multiple successors, return the branch recipe terminating
3931 /// the block. If there are no or only a single successor, return nullptr;
3932 VPRecipeBase *getTerminator();
3933 const VPRecipeBase *getTerminator() const;
3934
3935 /// Returns true if the block is exiting it's parent region.
3936 bool isExiting() const;
3937
3938 /// Clone the current block and it's recipes, without updating the operands of
3939 /// the cloned recipes.
3940 VPBasicBlock *clone() override;
3941
3942 /// Returns the predecessor block at index \p Idx with the predecessors as per
3943 /// the corresponding plain CFG. If the block is an entry block to a region,
3944 /// the first predecessor is the single predecessor of a region, and the
3945 /// second predecessor is the exiting block of the region.
3946 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
3947
3948protected:
3949 /// Execute the recipes in the IR basic block \p BB.
3950 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3951
3952 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3953 /// generated for this VPBB.
3954 void connectToPredecessors(VPTransformState &State);
3955
3956private:
3957 /// Create an IR BasicBlock to hold the output instructions generated by this
3958 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3959 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
3960};
3961
3962inline const VPBasicBlock *
3964 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
3965}
3966
3967/// A special type of VPBasicBlock that wraps an existing IR basic block.
3968/// Recipes of the block get added before the first non-phi instruction in the
3969/// wrapped block.
3970/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3971/// preheader block.
3972class VPIRBasicBlock : public VPBasicBlock {
3973 friend class VPlan;
3974
3975 BasicBlock *IRBB;
3976
3977 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
3978 VPIRBasicBlock(BasicBlock *IRBB)
3979 : VPBasicBlock(VPIRBasicBlockSC,
3980 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3981 IRBB(IRBB) {}
3982
3983public:
3984 ~VPIRBasicBlock() override = default;
3985
3986 static inline bool classof(const VPBlockBase *V) {
3987 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3988 }
3989
3990 /// The method which generates the output IR instructions that correspond to
3991 /// this VPBasicBlock, thereby "executing" the VPlan.
3992 void execute(VPTransformState *State) override;
3993
3994 VPIRBasicBlock *clone() override;
3995
3996 BasicBlock *getIRBasicBlock() const { return IRBB; }
3997};
3998
3999/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4000/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4001/// A VPRegionBlock may indicate that its contents are to be replicated several
4002/// times. This is designed to support predicated scalarization, in which a
4003/// scalar if-then code structure needs to be generated VF * UF times. Having
4004/// this replication indicator helps to keep a single model for multiple
4005/// candidate VF's. The actual replication takes place only once the desired VF
4006/// and UF have been determined.
4007class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4008 friend class VPlan;
4009
4010 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4011 VPBlockBase *Entry;
4012
4013 /// Hold the Single Exiting block of the SESE region modelled by the
4014 /// VPRegionBlock.
4015 VPBlockBase *Exiting;
4016
4017 /// An indicator whether this region is to generate multiple replicated
4018 /// instances of output IR corresponding to its VPBlockBases.
4019 bool IsReplicator;
4020
4021 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4022 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4023 const std::string &Name = "", bool IsReplicator = false)
4024 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4025 IsReplicator(IsReplicator) {
4026 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4027 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4028 Entry->setParent(this);
4029 Exiting->setParent(this);
4030 }
4031 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4032 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4033 IsReplicator(IsReplicator) {}
4034
4035public:
4036 ~VPRegionBlock() override = default;
4037
4038 /// Method to support type inquiry through isa, cast, and dyn_cast.
4039 static inline bool classof(const VPBlockBase *V) {
4040 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4041 }
4042
4043 const VPBlockBase *getEntry() const { return Entry; }
4044 VPBlockBase *getEntry() { return Entry; }
4045
4046 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4047 /// EntryBlock must have no predecessors.
4048 void setEntry(VPBlockBase *EntryBlock) {
4049 assert(EntryBlock->getPredecessors().empty() &&
4050 "Entry block cannot have predecessors.");
4051 Entry = EntryBlock;
4052 EntryBlock->setParent(this);
4053 }
4054
4055 const VPBlockBase *getExiting() const { return Exiting; }
4056 VPBlockBase *getExiting() { return Exiting; }
4057
4058 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4059 /// ExitingBlock must have no successors.
4060 void setExiting(VPBlockBase *ExitingBlock) {
4061 assert(ExitingBlock->getSuccessors().empty() &&
4062 "Exit block cannot have successors.");
4063 Exiting = ExitingBlock;
4064 ExitingBlock->setParent(this);
4065 }
4066
4067 /// Returns the pre-header VPBasicBlock of the loop region.
4069 assert(!isReplicator() && "should only get pre-header of loop regions");
4070 return getSinglePredecessor()->getExitingBasicBlock();
4071 }
4072
4073 /// An indicator whether this region is to generate multiple replicated
4074 /// instances of output IR corresponding to its VPBlockBases.
4075 bool isReplicator() const { return IsReplicator; }
4076
4077 /// The method which generates the output IR instructions that correspond to
4078 /// this VPRegionBlock, thereby "executing" the VPlan.
4079 void execute(VPTransformState *State) override;
4080
4081 // Return the cost of this region.
4082 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4083
4084#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4085 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4086 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4087 /// consequtive numbers.
4088 ///
4089 /// Note that the numbering is applied to the whole VPlan, so printing
4090 /// individual regions is consistent with the whole VPlan printing.
4091 void print(raw_ostream &O, const Twine &Indent,
4092 VPSlotTracker &SlotTracker) const override;
4093 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4094#endif
4095
4096 /// Clone all blocks in the single-entry single-exit region of the block and
4097 /// their recipes without updating the operands of the cloned recipes.
4098 VPRegionBlock *clone() override;
4099
4100 /// Remove the current region from its VPlan, connecting its predecessor to
4101 /// its entry, and its exiting block to its successor.
4102 void dissolveToCFGLoop();
4103
4104 /// Returns the canonical induction recipe of the region.
4106 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4107 if (EntryVPBB->empty()) {
4108 // VPlan native path. TODO: Unify both code paths.
4109 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4110 }
4111 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4112 }
4114 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4115 }
4116
4117 /// Return the type of the canonical IV for loop regions.
4118 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4119 const Type *getCanonicalIVType() const {
4120 return getCanonicalIV()->getScalarType();
4121 }
4122};
4123
4125 return getParent()->getParent();
4126}
4127
4129 return getParent()->getParent();
4130}
4131
4132/// VPlan models a candidate for vectorization, encoding various decisions take
4133/// to produce efficient output IR, including which branches, basic-blocks and
4134/// output IR instructions to generate, and their cost. VPlan holds a
4135/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4136/// VPBasicBlock.
4137class VPlan {
4138 friend class VPlanPrinter;
4139 friend class VPSlotTracker;
4140
4141 /// VPBasicBlock corresponding to the original preheader. Used to place
4142 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4143 /// rest of VPlan execution.
4144 /// When this VPlan is used for the epilogue vector loop, the entry will be
4145 /// replaced by a new entry block created during skeleton creation.
4146 VPBasicBlock *Entry;
4147
4148 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4149 VPIRBasicBlock *ScalarHeader;
4150
4151 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4152 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4153 /// e.g. if the scalar epilogue always executes.
4155
4156 /// Holds the VFs applicable to this VPlan.
4158
4159 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4160 /// any UF.
4162
4163 /// Holds the name of the VPlan, for printing.
4164 std::string Name;
4165
4166 /// Represents the trip count of the original loop, for folding
4167 /// the tail.
4168 VPValue *TripCount = nullptr;
4169
4170 /// Represents the backedge taken count of the original loop, for folding
4171 /// the tail. It equals TripCount - 1.
4172 VPValue *BackedgeTakenCount = nullptr;
4173
4174 /// Represents the vector trip count.
4175 VPValue VectorTripCount;
4176
4177 /// Represents the vectorization factor of the loop.
4178 VPValue VF;
4179
4180 /// Represents the loop-invariant VF * UF of the vector loop region.
4181 VPValue VFxUF;
4182
4183 /// Holds a mapping between Values and their corresponding VPValue inside
4184 /// VPlan.
4185 Value2VPValueTy Value2VPValue;
4186
4187 /// Contains all the external definitions created for this VPlan. External
4188 /// definitions are VPValues that hold a pointer to their underlying IR.
4190
4191 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4192 /// VPlan is destroyed.
4193 SmallVector<VPBlockBase *> CreatedBlocks;
4194
4195 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4196 /// wrapping the original header of the scalar loop.
4197 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4198 : Entry(Entry), ScalarHeader(ScalarHeader) {
4199 Entry->setPlan(this);
4200 assert(ScalarHeader->getNumSuccessors() == 0 &&
4201 "scalar header must be a leaf node");
4202 }
4203
4204public:
4205 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4206 /// original preheader and scalar header of \p L, to be used as entry and
4207 /// scalar header blocks of the new VPlan.
4208 VPlan(Loop *L);
4209
4210 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4211 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4212 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
4213 setEntry(createVPBasicBlock("preheader"));
4214 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4215 TripCount = TC;
4216 }
4217
4219
4221 Entry = VPBB;
4222 VPBB->setPlan(this);
4223 }
4224
4225 /// Generate the IR code for this VPlan.
4226 void execute(VPTransformState *State);
4227
4228 /// Return the cost of this plan.
4230
4231 VPBasicBlock *getEntry() { return Entry; }
4232 const VPBasicBlock *getEntry() const { return Entry; }
4233
4234 /// Returns the preheader of the vector loop region, if one exists, or null
4235 /// otherwise.
4237 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4238 return VectorRegion
4239 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4240 : nullptr;
4241 }
4242
4243 /// Returns the VPRegionBlock of the vector loop.
4246
4247 /// Returns the 'middle' block of the plan, that is the block that selects
4248 /// whether to execute the scalar tail loop or the exit block from the loop
4249 /// latch. If there is an early exit from the vector loop, the middle block
4250 /// conceptully has the early exit block as third successor, split accross 2
4251 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4252 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4253 /// known to always execute, the middle block may branch directly to that
4254 /// block. This function cannot be called once the vector loop region has been
4255 /// removed.
4257 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4258 assert(
4259 LoopRegion &&
4260 "cannot call the function after vector loop region has been removed");
4261 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4262 if (RegionSucc->getSingleSuccessor() ||
4263 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4264 return RegionSucc;
4265 // There is an early exit. The successor of RegionSucc is the middle block.
4266 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4267 }
4268
4270 return const_cast<VPlan *>(this)->getMiddleBlock();
4271 }
4272
4273 /// Return the VPBasicBlock for the preheader of the scalar loop.
4275 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4276 }
4277
4278 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4279 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4280
4281 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4282 /// the original scalar loop.
4283 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4284
4285 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4286 /// exit block.
4288
4289 /// Returns true if \p VPBB is an exit block.
4290 bool isExitBlock(VPBlockBase *VPBB);
4291
4292 /// The trip count of the original loop.
4294 assert(TripCount && "trip count needs to be set before accessing it");
4295 return TripCount;
4296 }
4297
4298 /// Set the trip count assuming it is currently null; if it is not - use
4299 /// resetTripCount().
4300 void setTripCount(VPValue *NewTripCount) {
4301 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4302 TripCount = NewTripCount;
4303 }
4304
4305 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4306 /// the original trip count have been replaced.
4307 void resetTripCount(VPValue *NewTripCount) {
4308 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4309 "TripCount must be set when resetting");
4310 TripCount = NewTripCount;
4311 }
4312
4313 /// The backedge taken count of the original loop.
4315 if (!BackedgeTakenCount)
4316 BackedgeTakenCount = new VPValue();
4317 return BackedgeTakenCount;
4318 }
4319 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4320
4321 /// The vector trip count.
4322 VPValue &getVectorTripCount() { return VectorTripCount; }
4323
4324 /// Returns the VF of the vector loop region.
4325 VPValue &getVF() { return VF; };
4326 const VPValue &getVF() const { return VF; };
4327
4328 /// Returns VF * UF of the vector loop region.
4329 VPValue &getVFxUF() { return VFxUF; }
4330
4333 }
4334
4335 void addVF(ElementCount VF) { VFs.insert(VF); }
4336
4338 assert(hasVF(VF) && "Cannot set VF not already in plan");
4339 VFs.clear();
4340 VFs.insert(VF);
4341 }
4342
4343 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4344 bool hasScalableVF() const {
4345 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4346 }
4347
4348 /// Returns an iterator range over all VFs of the plan.
4351 return VFs;
4352 }
4353
4354 bool hasScalarVFOnly() const {
4355 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4356 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4357 "Plan with scalar VF should only have a single VF");
4358 return HasScalarVFOnly;
4359 }
4360
4361 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4362
4363 unsigned getUF() const {
4364 assert(UFs.size() == 1 && "Expected a single UF");
4365 return UFs[0];
4366 }
4367
4368 void setUF(unsigned UF) {
4369 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4370 UFs.clear();
4371 UFs.insert(UF);
4372 }
4373
4374 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4375 /// concrete UF.
4376 bool isUnrolled() const { return UFs.size() == 1; }
4377
4378 /// Return a string with the name of the plan and the applicable VFs and UFs.
4379 std::string getName() const;
4380
4381 void setName(const Twine &newName) { Name = newName.str(); }
4382
4383 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4384 /// yet) for \p V.
4386 assert(V && "Trying to get or add the VPValue of a null Value");
4387 auto [It, Inserted] = Value2VPValue.try_emplace(V);
4388 if (Inserted) {
4389 VPValue *VPV = new VPValue(V);
4390 VPLiveIns.push_back(VPV);
4391 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4392 It->second = VPV;
4393 }
4394
4395 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4396 return It->second;
4397 }
4398
4399 /// Return a VPValue wrapping i1 true.
4400 VPValue *getTrue() { return getConstantInt(1, 1); }
4401
4402 /// Return a VPValue wrapping i1 false.
4403 VPValue *getFalse() { return getConstantInt(1, 0); }
4404
4405 /// Return a VPValue wrapping a ConstantInt with the given type and value.
4406 VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4407 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4408 }
4409
4410 /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
4412 bool IsSigned = false) {
4413 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4414 }
4415
4416 /// Return a VPValue wrapping a ConstantInt with the given APInt value.
4418 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4419 }
4420
4421 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4422 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4423
4424 /// Return the list of live-in VPValues available in the VPlan.
4426 assert(all_of(Value2VPValue,
4427 [this](const auto &P) {
4428 return is_contained(VPLiveIns, P.second);
4429 }) &&
4430 "all VPValues in Value2VPValue must also be in VPLiveIns");
4431 return VPLiveIns;
4432 }
4433
4434#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4435 /// Print the live-ins of this VPlan to \p O.
4436 void printLiveIns(raw_ostream &O) const;
4437
4438 /// Print this VPlan to \p O.
4439 void print(raw_ostream &O) const;
4440
4441 /// Print this VPlan in DOT format to \p O.
4442 void printDOT(raw_ostream &O) const;
4443
4444 /// Dump the plan to stderr (for debugging).
4445 LLVM_DUMP_METHOD void dump() const;
4446#endif
4447
4448 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4449 /// recipes to refer to the clones, and return it.
4450 VPlan *duplicate();
4451
4452 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4453 /// present. The returned block is owned by the VPlan and deleted once the
4454 /// VPlan is destroyed.
4456 VPRecipeBase *Recipe = nullptr) {
4457 auto *VPB = new VPBasicBlock(Name, Recipe);
4458 CreatedBlocks.push_back(VPB);
4459 return VPB;
4460 }
4461
4462 /// Create a new loop region with \p Name and entry and exiting blocks set
4463 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4464 /// owned by the VPlan and deleted once the VPlan is destroyed.
4465 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4466 VPBlockBase *Entry = nullptr,
4467 VPBlockBase *Exiting = nullptr) {
4468 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4469 : new VPRegionBlock(Name);
4470 CreatedBlocks.push_back(VPB);
4471 return VPB;
4472 }
4473
4474 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4475 /// returned block is owned by the VPlan and deleted once the VPlan is
4476 /// destroyed.
4478 const std::string &Name = "") {
4479 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4480 CreatedBlocks.push_back(VPB);
4481 return VPB;
4482 }
4483
4484 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4485 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4486 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4488
4489 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4490 /// instructions in \p IRBB, except its terminator which is managed by the
4491 /// successors of the block in VPlan. The returned block is owned by the VPlan
4492 /// and deleted once the VPlan is destroyed.
4494
4495 /// Returns true if the VPlan is based on a loop with an early exit. That is
4496 /// the case if the VPlan has either more than one exit block or a single exit
4497 /// block with multiple predecessors (one for the exit via the latch and one
4498 /// via the other early exit).
4499 bool hasEarlyExit() const {
4500 return count_if(ExitBlocks,
4501 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4502 1 ||
4503 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4504 }
4505
4506 /// Returns true if the scalar tail may execute after the vector loop. Note
4507 /// that this relies on unneeded branches to the scalar tail loop being
4508 /// removed.
4509 bool hasScalarTail() const {
4510 return !(!getScalarPreheader()->hasPredecessors() ||
4512 }
4513};
4514
4515#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4516inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4517 Plan.print(OS);
4518 return OS;
4519}
4520#endif
4521
4522} // end namespace llvm
4523
4524#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define I(x, y, z)
Definition MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
const SmallVectorImpl< MachineOperand > & Cond
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:495
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition ModRef.h:221
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition ModRef.h:218
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3540
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3534
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3819
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:3847
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:3894
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:3849
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3846
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:3872
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3830
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:3836
iterator end()
Definition VPlan.h:3856
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3854
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:3848
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:3907
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:774
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:220
~VPBasicBlock() override
Definition VPlan.h:3840
const_reverse_iterator rbegin() const
Definition VPlan.h:3860
reverse_iterator rend()
Definition VPlan.h:3861
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3834
VPRecipeBase & back()
Definition VPlan.h:3869
const VPRecipeBase & front() const
Definition VPlan.h:3866
const_iterator begin() const
Definition VPlan.h:3855
VPRecipeBase & front()
Definition VPlan.h:3867
const VPRecipeBase & back() const
Definition VPlan.h:3868
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:3885
bool empty() const
Definition VPlan.h:3865
const_iterator end() const
Definition VPlan.h:3857
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:3880
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:3875
reverse_iterator rbegin()
Definition VPlan.h:3859
friend class VPlan
Definition VPlan.h:3820
size_t size() const
Definition VPlan.h:3864
const_reverse_iterator rend() const
Definition VPlan.h:3862
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2436
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2441
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2431
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2452
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2467
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2418
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2413
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2447
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2427
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:80
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:299
VPRegionBlock * getParent()
Definition VPlan.h:172
VPBlocksTy & getPredecessors()
Definition VPlan.h:204
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:201
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:369
void setName(const Twine &newName)
Definition VPlan.h:165
size_t getNumSuccessors() const
Definition VPlan.h:218
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:200
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:222
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:321
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:649
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:159
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:257
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:334
size_t getNumPredecessors() const
Definition VPlan.h:219
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:290
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:212
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:327
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:203
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:157
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:184
const VPRegionBlock * getParent() const
Definition VPlan.h:173
const std::string & getName() const
Definition VPlan.h:163
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:309
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:247
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:281
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:214
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:241
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:306
friend class VPBlockUtils
Definition VPlan.h:81
unsigned getVPBlockID() const
Definition VPlan.h:170
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:348
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:313
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:149
VPBlocksTy & getSuccessors()
Definition VPlan.h:198
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:204
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:170
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:270
void setParent(VPRegionBlock *P)
Definition VPlan.h:183
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:263
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:208
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:197
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:2972
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2956
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2980
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:2953
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3475
~VPCanonicalIVPHIRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3507
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3482
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3514
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition VPlan.h:3477
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3502
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3490
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3521
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:302
friend class VPValue
Definition VPlanValue.h:303
VPDef(const unsigned char SC)
Definition VPlanValue.h:382
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3679
VPValue * getStepValue() const
Definition VPlan.h:3696
Type * getScalarType() const
Definition VPlan.h:3691
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3667
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3659
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3699
VPValue * getStartValue() const
Definition VPlan.h:3695
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3651
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3589
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3570
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3576
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3582
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3565
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3451
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3456
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3442
const SCEV * getSCEV() const
Definition VPlan.h:3468
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3447
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3107
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3089
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3071
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3059
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3045
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3037
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3041
unsigned getVFScaleFactor() const
Definition VPlan.h:3101
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3039
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1989
static bool classof(const VPValue *V)
Definition VPlan.h:1999
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2030
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2035
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2019
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2027
VPValue * getStartValue() const
Definition VPlan.h:2022
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:1995
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2039
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1984
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1700
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1717
unsigned getOpcode() const
Definition VPlan.h:1713
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1694
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:3972
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:450
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:3996
static bool classof(const VPBlockBase *V)
Definition VPlan.h:3986
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:3973
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:475
Class to record and manage LLVM IR flags.
Definition VPlan.h:596
FastMathFlagsTy FMFs
Definition VPlan.h:660
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:709
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:701
WrapFlagsTy WrapFlags
Definition VPlan.h:654
CmpInst::Predicate CmpPredicate
Definition VPlan.h:653
void printFlags(raw_ostream &O) const
GEPNoWrapFlags GEPFlags
Definition VPlan.h:658
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:818
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:655
CmpInst::Predicate getPredicate() const
Definition VPlan.h:800
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:823
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:718
ExactFlagsTy ExactFlags
Definition VPlan.h:657
bool hasNoSignedWrap() const
Definition VPlan.h:842
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:853
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:704
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:707
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:712
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:698
bool isNonNeg() const
Definition VPlan.h:825
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:812
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:815
DisjointFlagsTy DisjointFlags
Definition VPlan.h:656
unsigned AllFlags
Definition VPlan.h:661
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:806
bool hasNoUnsignedWrap() const
Definition VPlan.h:831
NonNegFlagsTy NonNegFlags
Definition VPlan.h:659
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:728
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:763
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:715
VPIRFlags(Instruction &I)
Definition VPlan.h:667
Instruction & getInstruction() const
Definition VPlan.h:1382
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1396
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1369
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1402
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1390
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1357
Helper to manage IR metadata for recipes.
Definition VPlan.h:938
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:946
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetada object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void addMetadata(unsigned Kind, MDNode *Node)
Add metadata with kind Kind and Node.
Definition VPlan.h:961
void applyMetadata(Instruction &I) const
Add all metadata to I.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1238
static bool classof(const VPUser *R)
Definition VPlan.h:1223
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1205
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata, const Twine &Name="")
Definition VPlan.h:1199
Type * getResultType() const
Definition VPlan.h:1244
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1193
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1227
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:976
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1101
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1112
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1060
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1014
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1050
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1063
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1011
@ FirstOrderRecurrenceSplice
Definition VPlan.h:982
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1054
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1006
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1003
@ VScale
Returns the value for vscale.
Definition VPlan.h:1065
@ CanonicalIVIncrementForPart
Definition VPlan.h:996
@ CalculateTripCountMinusVF
Definition VPlan.h:994
bool hasResult() const
Definition VPlan.h:1140
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1180
unsigned getOpcode() const
Definition VPlan.h:1120
friend class VPlanSlp
Definition VPlan.h:977
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2546
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2552
static bool classof(const VPUser *U)
Definition VPlan.h:2528
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2495
Instruction * getInsertPos() const
Definition VPlan.h:2550
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2523
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2548
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2540
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2569
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2534
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2655
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2643
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2662
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2636
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2623
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2580
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2613
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2590
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2607
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2582
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
Definition VPlan.h:2775
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned ScaleFactor, Instruction *ReductionInst=nullptr)
Definition VPlan.h:2779
~VPPartialReductionRecipe() override=default
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2816
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition VPlan.h:2813
VPPartialReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2797
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1255
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1277
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1272
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:3963
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1297
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1264
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1282
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1286
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3169
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3145
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3156
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3141
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:473
VPRegionBlock * getRegion()
Definition VPlan.h:4124
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:484
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:407
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:478
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:453
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:388
const VPBasicBlock * getParent() const
Definition VPlan.h:408
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:458
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:397
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2858
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2831
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2861
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2842
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2392
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2361
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2375
~VPReductionPHIRecipe() override=default
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false, unsigned VFScaleFactor=1)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2351
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2386
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition VPlan.h:2395
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2398
void execute(VPTransformState &State) override
Generate the phi/select nodes.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2389
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition VPlan.h:2670
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2752
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2714
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2699
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2730
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2756
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2758
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2748
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2750
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2754
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2692
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2708
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition VPlan.h:2678
static bool classof(const VPUser *U)
Definition VPlan.h:2720
static bool classof(const VPValue *VPV)
Definition VPlan.h:2725
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4007
const VPBlockBase * getEntry() const
Definition VPlan.h:4043
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4118
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4075
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4060
VPBlockBase * getExiting()
Definition VPlan.h:4056
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4105
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4048
const Type * getCanonicalIVType() const
Definition VPlan.h:4119
const VPBlockBase * getExiting() const
Definition VPlan.h:4055
VPBlockBase * getEntry()
Definition VPlan.h:4044
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4113
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4068
friend class VPlan
Definition VPlan.h:4008
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4039
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2873
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, VPIRMetadata Metadata={})
Definition VPlan.h:2881
bool isSingleScalar() const
Definition VPlan.h:2918
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2930
bool isPredicated() const
Definition VPlan.h:2920
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2893
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2923
unsigned getOpcode() const
Definition VPlan.h:2947
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:2942
VPValue * getStepValue() const
Definition VPlan.h:3761
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3749
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3720
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3741
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3732
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3713
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3764
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:517
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:523
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:582
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:527
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:585
static bool classof(const VPUser *U)
Definition VPlan.h:574
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:519
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:926
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:199
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1425
operand_range operands()
Definition VPlanValue.h:267
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:243
unsigned getNumOperands() const
Definition VPlanValue.h:237
operand_iterator op_end()
Definition VPlanValue.h:265
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:238
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:218
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:261
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:260
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:135
friend class VPExpressionRecipe
Definition VPlanValue.h:53
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:176
friend class VPDef
Definition VPlanValue.h:49
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:85
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:98
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:186
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:171
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1869
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1890
const VPValue * getVFValue() const
Definition VPlan.h:1865
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1883
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1876
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1854
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isFirstPart() const
Return true if this VPVectorPointerRecipe corresponds to part 0.
Definition VPlan.h:1942
Type * getSourceElementType() const
Definition VPlan.h:1919
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1921
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1928
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1909
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:1945
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1935
const_operand_range args() const
Definition VPlan.h:1675
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1656
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1643
operand_range args()
Definition VPlan.h:1674
Function * getCalledScalarFunction() const
Definition VPlan.h:1670
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3624
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3611
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3606
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1489
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition VPlan.h:1497
Instruction::CastOps getOpcode() const
Definition VPlan.h:1540
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1505
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1543
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1517
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1811
Type * getSourceElementType() const
Definition VPlan.h:1816
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands)
Definition VPlan.h:1792
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1819
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1803
~VPWidenGEPRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1832
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2109
static bool classof(const VPValue *V)
Definition VPlan.h:2063
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2079
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2094
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2087
PHINode * getPHINode() const
Definition VPlan.h:2089
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2051
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2075
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2092
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2101
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2058
static bool classof(const VPHeaderPHIRecipe *R)
Definition VPlan.h:2068
const VPValue * getVFValue() const
Definition VPlan.h:2082
const VPValue * getStepValue() const
Definition VPlan.h:2076
const TruncInst * getTruncInst() const
Definition VPlan.h:2187
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2162
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition VPlan.h:2138
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2154
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2186
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2129
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2203
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2182
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2195
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1573
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1608
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1617
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1564
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1623
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1590
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1620
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1611
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3193
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3190
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3233
static bool classof(const VPUser *U)
Definition VPlan.h:3227
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3256
Instruction & Ingredient
Definition VPlan.h:3181
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3216
Instruction & getIngredient() const
Definition VPlan.h:3264
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3187
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3220
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3247
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3184
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3243
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3203
void setMask(VPValue *Mask)
Definition VPlan.h:3195
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3253
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3240
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3237
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2263
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2268
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2275
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2227
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2237
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2215
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1446
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1462
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands)
Definition VPlan.h:1456
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:1450
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1479
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4137
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1111
friend class VPSlotTracker
Definition VPlan.h:4139
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1087
bool hasVF(ElementCount VF) const
Definition VPlan.h:4343
LLVMContext & getContext() const
Definition VPlan.h:4331
VPBasicBlock * getEntry()
Definition VPlan.h:4231
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4322
void setName(const Twine &newName)
Definition VPlan.h:4381
bool hasScalableVF() const
Definition VPlan.h:4344
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4329
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4325
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4293
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4400
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4314
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4350
VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4212
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:895
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:872
const VPValue & getVF() const
Definition VPlan.h:4326
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:903
const VPBasicBlock * getEntry() const
Definition VPlan.h:4232
friend class VPlanPrinter
Definition VPlan.h:4138
VPValue * getConstantInt(const APInt &Val)
Return a VPValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4417
unsigned getUF() const
Definition VPlan.h:4363
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4477
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1225
bool hasUF(unsigned UF) const
Definition VPlan.h:4361
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4283
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4406
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4319
void setVF(ElementCount VF)
Definition VPlan.h:4337
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4376
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1016
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4499
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:998
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4269
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4300
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4307
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4256
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4220
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4455
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1231
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4403
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4385
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4465
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1117
bool hasScalarVFOnly() const
Definition VPlan.h:4354
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4274
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:910
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4425
void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1070
void addVF(ElementCount VF)
Definition VPlan.h:4335
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4279
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4422
VPValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4411
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1032
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4236
void setUF(unsigned UF)
Definition VPlan.h:4368
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4509
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1158
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Increasing range of size_t indices.
Definition STLExtras.h:2425
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:296
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
DenseMap< Value *, VPValue * > Value2VPValueTy
Definition VPlanValue.h:192
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:76
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3782
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3803
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3784
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3787
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3774
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2306
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition VPlan.h:2301
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2324
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:626
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:631
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:621
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:614
PHINode & getIRPhi()
Definition VPlan.h:1427
VPIRPhi(PHINode &PN)
Definition VPlan.h:1420
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1422
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1438
static bool classof(const VPUser *U)
Definition VPlan.h:1315
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1330
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1345
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1312
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1325
static bool classof(const VPValue *V)
Definition VPlan.h:1320
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:871
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:885
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Instruction &I)
Definition VPlan.h:876
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:880
static bool classof(const VPValue *V)
Definition VPlan.h:905
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:912
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:900
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:872
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3323
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3311
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3339
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3270
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3298
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3271
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3280
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1739
VPWidenSelectRecipe(SelectInst &I, ArrayRef< VPValue * > Operands)
Definition VPlan.h:1733
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1766
VPValue * getCond() const
Definition VPlan.h:1761
unsigned getOpcode() const
Definition VPlan.h:1759
~VPWidenSelectRecipe() override=default
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3404
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3423
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3393
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3407
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3350
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3368
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3359
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3380
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3351