LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPBuilder;
63class VPDominatorTree;
64class VPRegionBlock;
65class VPlan;
66class VPLane;
68class Value;
70
71struct VPCostContext;
72
73using VPlanPtr = std::unique_ptr<VPlan>;
74
75/// \enum UncountableExitStyle
76/// Different methods of handling early exits.
77///
80 /// No side effects to worry about, so we can process any uncountable exits
81 /// in the loop and branch either to the middle block if the trip count was
82 /// reached, or an early exitblock to determine which exit was taken.
84 /// All memory operations other than the load(s) required to determine whether
85 /// an uncountable exit occurre will be masked based on that condition. If an
86 /// uncountable exit is taken, then all lanes before the exiting lane will
87 /// complete, leaving just the final lane to execute in the scalar tail.
89};
90
91/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
92/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
94 friend class VPBlockUtils;
95
96 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
97
98 /// An optional name for the block.
99 std::string Name;
100
101 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
102 /// it is a topmost VPBlockBase.
103 VPRegionBlock *Parent = nullptr;
104
105 /// List of predecessor blocks.
107
108 /// List of successor blocks.
110
111 /// VPlan containing the block. Can only be set on the entry block of the
112 /// plan.
113 VPlan *Plan = nullptr;
114
115 /// Add \p Successor as the last successor to this block.
116 void appendSuccessor(VPBlockBase *Successor) {
117 assert(Successor && "Cannot add nullptr successor!");
118 Successors.push_back(Successor);
119 }
120
121 /// Add \p Predecessor as the last predecessor to this block.
122 void appendPredecessor(VPBlockBase *Predecessor) {
123 assert(Predecessor && "Cannot add nullptr predecessor!");
124 Predecessors.push_back(Predecessor);
125 }
126
127 /// Remove \p Predecessor from the predecessors of this block.
128 void removePredecessor(VPBlockBase *Predecessor) {
129 auto Pos = find(Predecessors, Predecessor);
130 assert(Pos && "Predecessor does not exist");
131 Predecessors.erase(Pos);
132 }
133
134 /// Remove \p Successor from the successors of this block.
135 void removeSuccessor(VPBlockBase *Successor) {
136 auto Pos = find(Successors, Successor);
137 assert(Pos && "Successor does not exist");
138 Successors.erase(Pos);
139 }
140
141 /// This function replaces one predecessor with another, useful when
142 /// trying to replace an old block in the CFG with a new one.
143 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
144 auto I = find(Predecessors, Old);
145 assert(I != Predecessors.end());
146 assert(Old->getParent() == New->getParent() &&
147 "replaced predecessor must have the same parent");
148 *I = New;
149 }
150
151 /// This function replaces one successor with another, useful when
152 /// trying to replace an old block in the CFG with a new one.
153 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
154 auto I = find(Successors, Old);
155 assert(I != Successors.end());
156 assert(Old->getParent() == New->getParent() &&
157 "replaced successor must have the same parent");
158 *I = New;
159 }
160
161protected:
162 VPBlockBase(const unsigned char SC, const std::string &N)
163 : SubclassID(SC), Name(N) {}
164
165public:
166 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
167 /// that are actually instantiated. Values of this enumeration are kept in the
168 /// SubclassID field of the VPBlockBase objects. They are used for concrete
169 /// type identification.
170 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
171
173
174 virtual ~VPBlockBase() = default;
175
176 const std::string &getName() const { return Name; }
177
178 void setName(const Twine &newName) { Name = newName.str(); }
179
180 /// \return an ID for the concrete type of this object.
181 /// This is used to implement the classof checks. This should not be used
182 /// for any other purpose, as the values may change as LLVM evolves.
183 unsigned getVPBlockID() const { return SubclassID; }
184
185 VPRegionBlock *getParent() { return Parent; }
186 const VPRegionBlock *getParent() const { return Parent; }
187
188 /// \return A pointer to the plan containing the current block.
189 VPlan *getPlan();
190 const VPlan *getPlan() const;
191
192 /// Sets the pointer of the plan containing the block. The block must be the
193 /// entry block into the VPlan.
194 void setPlan(VPlan *ParentPlan);
195
196 void setParent(VPRegionBlock *P) { Parent = P; }
197
198 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
199 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
200 /// VPBlockBase is a VPBasicBlock, it is returned.
201 const VPBasicBlock *getEntryBasicBlock() const;
202 VPBasicBlock *getEntryBasicBlock();
203
204 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
205 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
206 /// VPBlockBase is a VPBasicBlock, it is returned.
207 const VPBasicBlock *getExitingBasicBlock() const;
208 VPBasicBlock *getExitingBasicBlock();
209
210 const VPBlocksTy &getSuccessors() const { return Successors; }
211 VPBlocksTy &getSuccessors() { return Successors; }
212
213 /// Returns true if this block has any successors.
214 bool hasSuccessors() const { return !Successors.empty(); }
215 /// Returns true if this block has any predecessors.
216 bool hasPredecessors() const { return !Predecessors.empty(); }
217
220
221 const VPBlocksTy &getPredecessors() const { return Predecessors; }
222 VPBlocksTy &getPredecessors() { return Predecessors; }
223
224 /// \return the successor of this VPBlockBase if it has a single successor.
225 /// Otherwise return a null pointer.
227 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
228 }
229
230 /// \return the predecessor of this VPBlockBase if it has a single
231 /// predecessor. Otherwise return a null pointer.
233 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
234 }
235
236 size_t getNumSuccessors() const { return Successors.size(); }
237 size_t getNumPredecessors() const { return Predecessors.size(); }
238
239 /// An Enclosing Block of a block B is any block containing B, including B
240 /// itself. \return the closest enclosing block starting from "this", which
241 /// has successors. \return the root enclosing block if all enclosing blocks
242 /// have no successors.
243 VPBlockBase *getEnclosingBlockWithSuccessors();
244
245 /// \return the closest enclosing block starting from "this", which has
246 /// predecessors. \return the root enclosing block if all enclosing blocks
247 /// have no predecessors.
248 VPBlockBase *getEnclosingBlockWithPredecessors();
249
250 /// \return the successors either attached directly to this VPBlockBase or, if
251 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
252 /// successors of its own, search recursively for the first enclosing
253 /// VPRegionBlock that has successors and return them. If no such
254 /// VPRegionBlock exists, return the (empty) successors of the topmost
255 /// VPBlockBase reached.
257 return getEnclosingBlockWithSuccessors()->getSuccessors();
258 }
259
260 /// \return the hierarchical successor of this VPBlockBase if it has a single
261 /// hierarchical successor. Otherwise return a null pointer.
263 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
264 }
265
266 /// \return the predecessors either attached directly to this VPBlockBase or,
267 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
268 /// predecessors of its own, search recursively for the first enclosing
269 /// VPRegionBlock that has predecessors and return them. If no such
270 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
271 /// VPBlockBase reached.
273 return getEnclosingBlockWithPredecessors()->getPredecessors();
274 }
275
276 /// \return the hierarchical predecessor of this VPBlockBase if it has a
277 /// single hierarchical predecessor. Otherwise return a null pointer.
281
282 /// Set a given VPBlockBase \p Successor as the single successor of this
283 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
284 /// This VPBlockBase must have no successors.
286 assert(Successors.empty() && "Setting one successor when others exist.");
287 assert(Successor->getParent() == getParent() &&
288 "connected blocks must have the same parent");
289 appendSuccessor(Successor);
290 }
291
292 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
293 /// successors of this VPBlockBase. This VPBlockBase is not added as
294 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
295 /// successors.
296 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
297 assert(Successors.empty() && "Setting two successors when others exist.");
298 appendSuccessor(IfTrue);
299 appendSuccessor(IfFalse);
300 }
301
302 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
303 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
304 /// as successor of any VPBasicBlock in \p NewPreds.
306 assert(Predecessors.empty() && "Block predecessors already set.");
307 for (auto *Pred : NewPreds)
308 appendPredecessor(Pred);
309 }
310
311 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
312 /// This VPBlockBase must have no successors. This VPBlockBase is not added
313 /// as predecessor of any VPBasicBlock in \p NewSuccs.
315 assert(Successors.empty() && "Block successors already set.");
316 for (auto *Succ : NewSuccs)
317 appendSuccessor(Succ);
318 }
319
320 /// Remove all the predecessor of this block.
321 void clearPredecessors() { Predecessors.clear(); }
322
323 /// Remove all the successors of this block.
324 void clearSuccessors() { Successors.clear(); }
325
326 /// Swap predecessors of the block. The block must have exactly 2
327 /// predecessors.
329 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
330 std::swap(Predecessors[0], Predecessors[1]);
331 }
332
333 /// Swap successors of the block. The block must have exactly 2 successors.
334 // TODO: This should be part of introducing conditional branch recipes rather
335 // than being independent.
337 assert(Successors.size() == 2 && "must have 2 successors to swap");
338 std::swap(Successors[0], Successors[1]);
339 }
340
341 /// Returns the index for \p Pred in the blocks predecessors list.
342 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
343 assert(count(Predecessors, Pred) == 1 &&
344 "must have Pred exactly once in Predecessors");
345 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
346 }
347
348 /// Returns the index for \p Succ in the blocks successor list.
349 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
350 assert(count(Successors, Succ) == 1 &&
351 "must have Succ exactly once in Successors");
352 return std::distance(Successors.begin(), find(Successors, Succ));
353 }
354
355 /// The method which generates the output IR that correspond to this
356 /// VPBlockBase, thereby "executing" the VPlan.
357 virtual void execute(VPTransformState *State) = 0;
358
359 /// Return the cost of the block.
361
362#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
363 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
364 OS << getName();
365 }
366
367 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
368 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
369 /// consequtive numbers.
370 ///
371 /// Note that the numbering is applied to the whole VPlan, so printing
372 /// individual blocks is consistent with the whole VPlan printing.
373 virtual void print(raw_ostream &O, const Twine &Indent,
374 VPSlotTracker &SlotTracker) const = 0;
375
376 /// Print plain-text dump of this VPlan to \p O.
377 void print(raw_ostream &O) const;
378
379 /// Print the successors of this block to \p O, prefixing all lines with \p
380 /// Indent.
381 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
382
383 /// Dump this VPBlockBase to dbgs().
384 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
385#endif
386
387 /// Clone the current block and it's recipes without updating the operands of
388 /// the cloned recipes, including all blocks in the single-entry single-exit
389 /// region for VPRegionBlocks.
390 virtual VPBlockBase *clone() = 0;
391};
392
393/// VPRecipeBase is a base class modeling a sequence of one or more output IR
394/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
395/// and is responsible for deleting its defined values. Single-value
396/// recipes must inherit from VPSingleDef instead of inheriting from both
397/// VPRecipeBase and VPValue separately.
399 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
400 public VPDef,
401 public VPUser {
402 friend VPBasicBlock;
403 friend class VPBlockUtils;
404
405 /// Subclass identifier (for isa/dyn_cast).
406 const unsigned char SubclassID;
407
408 /// Each VPRecipe belongs to a single VPBasicBlock.
409 VPBasicBlock *Parent = nullptr;
410
411 /// The debug location for the recipe.
412 DebugLoc DL;
413
414public:
415 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
416 /// that is actually instantiated. Values of this enumeration are kept in the
417 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
418 /// type identification.
419 using VPRecipeTy = enum {
420 VPBranchOnMaskSC,
421 VPDerivedIVSC,
422 VPExpandSCEVSC,
423 VPExpressionSC,
424 VPIRInstructionSC,
425 VPInstructionSC,
426 VPInterleaveEVLSC,
427 VPInterleaveSC,
428 VPReductionEVLSC,
429 VPReductionSC,
430 VPReplicateSC,
431 VPScalarIVStepsSC,
432 VPVectorPointerSC,
433 VPVectorEndPointerSC,
434 VPWidenCallSC,
435 VPWidenCanonicalIVSC,
436 VPWidenCastSC,
437 VPWidenGEPSC,
438 VPWidenIntrinsicSC,
439 VPWidenLoadEVLSC,
440 VPWidenLoadSC,
441 VPWidenStoreEVLSC,
442 VPWidenStoreSC,
443 VPWidenSC,
444 VPBlendSC,
445 VPHistogramSC,
446 // START: Phi-like recipes. Need to be kept together.
447 VPWidenPHISC,
448 VPPredInstPHISC,
449 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
450 // VPHeaderPHIRecipe need to be kept together.
451 VPCurrentIterationPHISC,
452 VPActiveLaneMaskPHISC,
453 VPFirstOrderRecurrencePHISC,
454 VPWidenIntOrFpInductionSC,
455 VPWidenPointerInductionSC,
456 VPReductionPHISC,
457 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
458 // END: Phi-like recipes
459 VPFirstPHISC = VPWidenPHISC,
460 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
461 VPLastHeaderPHISC = VPReductionPHISC,
462 VPLastPHISC = VPReductionPHISC,
463 };
464
465 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
467 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
468
469 ~VPRecipeBase() override = default;
470
471 /// Clone the current recipe.
472 virtual VPRecipeBase *clone() = 0;
473
474 /// \return the VPBasicBlock which this VPRecipe belongs to.
475 VPBasicBlock *getParent() { return Parent; }
476 const VPBasicBlock *getParent() const { return Parent; }
477
478 /// \return the VPRegionBlock which the recipe belongs to.
479 VPRegionBlock *getRegion();
480 const VPRegionBlock *getRegion() const;
481
482 /// The method which generates the output IR instructions that correspond to
483 /// this VPRecipe, thereby "executing" the VPlan.
484 virtual void execute(VPTransformState &State) = 0;
485
486 /// Return the cost of this recipe, taking into account if the cost
487 /// computation should be skipped and the ForceTargetInstructionCost flag.
488 /// Also takes care of printing the cost for debugging.
490
491 /// Insert an unlinked recipe into a basic block immediately before
492 /// the specified recipe.
493 void insertBefore(VPRecipeBase *InsertPos);
494 /// Insert an unlinked recipe into \p BB immediately before the insertion
495 /// point \p IP;
496 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
497
498 /// Insert an unlinked Recipe into a basic block immediately after
499 /// the specified Recipe.
500 void insertAfter(VPRecipeBase *InsertPos);
501
502 /// Unlink this recipe from its current VPBasicBlock and insert it into
503 /// the VPBasicBlock that MovePos lives in, right after MovePos.
504 void moveAfter(VPRecipeBase *MovePos);
505
506 /// Unlink this recipe and insert into BB before I.
507 ///
508 /// \pre I is a valid iterator into BB.
509 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
510
511 /// This method unlinks 'this' from the containing basic block, but does not
512 /// delete it.
513 void removeFromParent();
514
515 /// This method unlinks 'this' from the containing basic block and deletes it.
516 ///
517 /// \returns an iterator pointing to the element after the erased one
519
520 /// \return an ID for the concrete type of this object.
521 unsigned getVPRecipeID() const { return SubclassID; }
522
523 /// Method to support type inquiry through isa, cast, and dyn_cast.
524 static inline bool classof(const VPDef *D) {
525 // All VPDefs are also VPRecipeBases.
526 return true;
527 }
528
529 static inline bool classof(const VPUser *U) { return true; }
530
531 /// Returns true if the recipe may have side-effects.
532 bool mayHaveSideEffects() const;
533
534 /// Return true if we can safely execute this recipe unconditionally even if
535 /// it is masked originally.
536 bool isSafeToSpeculativelyExecute() const;
537
538 /// Returns true for PHI-like recipes.
539 bool isPhi() const;
540
541 /// Returns true if the recipe may read from memory.
542 bool mayReadFromMemory() const;
543
544 /// Returns true if the recipe may write to memory.
545 bool mayWriteToMemory() const;
546
547 /// Returns true if the recipe may read from or write to memory.
548 bool mayReadOrWriteMemory() const {
550 }
551
552 /// Returns the debug location of the recipe.
553 DebugLoc getDebugLoc() const { return DL; }
554
555 /// Return true if the recipe is a scalar cast.
556 bool isScalarCast() const;
557
558 /// Set the recipe's debug location to \p NewDL.
559 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
560
561#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
562 /// Dump the recipe to stderr (for debugging).
563 LLVM_ABI_FOR_TEST void dump() const;
564
565 /// Print the recipe, delegating to printRecipe().
566 void print(raw_ostream &O, const Twine &Indent,
568#endif
569
570protected:
571 /// Compute the cost of this recipe either using a recipe's specialized
572 /// implementation or using the legacy cost model and the underlying
573 /// instructions.
574 virtual InstructionCost computeCost(ElementCount VF,
575 VPCostContext &Ctx) const;
576
577#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
578 /// Each concrete VPRecipe prints itself, without printing common information,
579 /// like debug info or metadata.
580 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
581 VPSlotTracker &SlotTracker) const = 0;
582#endif
583};
584
585// Helper macro to define common classof implementations for recipes.
586#define VP_CLASSOF_IMPL(VPRecipeID) \
587 static inline bool classof(const VPRecipeBase *R) { \
588 return R->getVPRecipeID() == VPRecipeID; \
589 } \
590 static inline bool classof(const VPValue *V) { \
591 auto *R = V->getDefiningRecipe(); \
592 return R && R->getVPRecipeID() == VPRecipeID; \
593 } \
594 static inline bool classof(const VPUser *U) { \
595 auto *R = dyn_cast<VPRecipeBase>(U); \
596 return R && R->getVPRecipeID() == VPRecipeID; \
597 } \
598 static inline bool classof(const VPSingleDefRecipe *R) { \
599 return R->getVPRecipeID() == VPRecipeID; \
600 }
601
602/// VPSingleDef is a base class for recipes for modeling a sequence of one or
603/// more output IR that define a single result VPValue.
604/// Note that VPRecipeBase must be inherited from before VPValue.
606public:
607 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
609 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
610
611 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
613 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
614
615 static inline bool classof(const VPRecipeBase *R) {
616 switch (R->getVPRecipeID()) {
617 case VPRecipeBase::VPDerivedIVSC:
618 case VPRecipeBase::VPExpandSCEVSC:
619 case VPRecipeBase::VPExpressionSC:
620 case VPRecipeBase::VPInstructionSC:
621 case VPRecipeBase::VPReductionEVLSC:
622 case VPRecipeBase::VPReductionSC:
623 case VPRecipeBase::VPReplicateSC:
624 case VPRecipeBase::VPScalarIVStepsSC:
625 case VPRecipeBase::VPVectorPointerSC:
626 case VPRecipeBase::VPVectorEndPointerSC:
627 case VPRecipeBase::VPWidenCallSC:
628 case VPRecipeBase::VPWidenCanonicalIVSC:
629 case VPRecipeBase::VPWidenCastSC:
630 case VPRecipeBase::VPWidenGEPSC:
631 case VPRecipeBase::VPWidenIntrinsicSC:
632 case VPRecipeBase::VPWidenSC:
633 case VPRecipeBase::VPBlendSC:
634 case VPRecipeBase::VPPredInstPHISC:
635 case VPRecipeBase::VPCurrentIterationPHISC:
636 case VPRecipeBase::VPActiveLaneMaskPHISC:
637 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
638 case VPRecipeBase::VPWidenPHISC:
639 case VPRecipeBase::VPWidenIntOrFpInductionSC:
640 case VPRecipeBase::VPWidenPointerInductionSC:
641 case VPRecipeBase::VPReductionPHISC:
642 case VPRecipeBase::VPWidenLoadEVLSC:
643 case VPRecipeBase::VPWidenLoadSC:
644 return true;
645 case VPRecipeBase::VPBranchOnMaskSC:
646 case VPRecipeBase::VPInterleaveEVLSC:
647 case VPRecipeBase::VPInterleaveSC:
648 case VPRecipeBase::VPIRInstructionSC:
649 case VPRecipeBase::VPWidenStoreEVLSC:
650 case VPRecipeBase::VPWidenStoreSC:
651 case VPRecipeBase::VPHistogramSC:
652 return false;
653 }
654 llvm_unreachable("Unhandled VPRecipeID");
655 }
656
657 static inline bool classof(const VPValue *V) {
658 auto *R = V->getDefiningRecipe();
659 return R && classof(R);
660 }
661
662 static inline bool classof(const VPUser *U) {
663 auto *R = dyn_cast<VPRecipeBase>(U);
664 return R && classof(R);
665 }
666
667 VPSingleDefRecipe *clone() override = 0;
668
669 /// Returns the underlying instruction.
676
677#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
678 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
680#endif
681};
682
683/// Class to record and manage LLVM IR flags.
686 enum class OperationType : unsigned char {
687 Cmp,
688 FCmp,
689 OverflowingBinOp,
690 Trunc,
691 DisjointOp,
692 PossiblyExactOp,
693 GEPOp,
694 FPMathOp,
695 NonNegOp,
696 ReductionOp,
697 Other
698 };
699
700public:
701 struct WrapFlagsTy {
702 char HasNUW : 1;
703 char HasNSW : 1;
704
706 };
707
709 char HasNUW : 1;
710 char HasNSW : 1;
711
713 };
714
719
721 char NonNeg : 1;
722 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
723 };
724
725private:
726 struct ExactFlagsTy {
727 char IsExact : 1;
728 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
729 };
730 struct FastMathFlagsTy {
731 char AllowReassoc : 1;
732 char NoNaNs : 1;
733 char NoInfs : 1;
734 char NoSignedZeros : 1;
735 char AllowReciprocal : 1;
736 char AllowContract : 1;
737 char ApproxFunc : 1;
738
739 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
740 };
741 /// Holds both the predicate and fast-math flags for floating-point
742 /// comparisons.
743 struct FCmpFlagsTy {
744 uint8_t CmpPredStorage;
745 FastMathFlagsTy FMFs;
746 };
747 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
748 struct ReductionFlagsTy {
749 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
750 // additional kinds.
751 unsigned char Kind : 6;
752 // TODO: Derive order/in-loop from plan and remove here.
753 unsigned char IsOrdered : 1;
754 unsigned char IsInLoop : 1;
755 FastMathFlagsTy FMFs;
756
757 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
758 FastMathFlags FMFs)
759 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
760 IsInLoop(IsInLoop), FMFs(FMFs) {}
761 };
762
763 OperationType OpType;
764
765 union {
770 ExactFlagsTy ExactFlags;
773 FastMathFlagsTy FMFs;
774 FCmpFlagsTy FCmpFlags;
775 ReductionFlagsTy ReductionFlags;
777 };
778
779public:
780 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
781
783 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
784 OpType = OperationType::FCmp;
786 FCmp->getPredicate());
787 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
788 FCmpFlags.FMFs = FCmp->getFastMathFlags();
789 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
790 OpType = OperationType::Cmp;
792 Op->getPredicate());
793 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
794 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
795 OpType = OperationType::DisjointOp;
796 DisjointFlags.IsDisjoint = Op->isDisjoint();
797 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
798 OpType = OperationType::OverflowingBinOp;
799 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
800 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
801 OpType = OperationType::Trunc;
802 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
803 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
804 OpType = OperationType::PossiblyExactOp;
805 ExactFlags.IsExact = Op->isExact();
806 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
807 OpType = OperationType::GEPOp;
808 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
809 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
810 "wrap flags truncated");
811 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
812 OpType = OperationType::NonNegOp;
813 NonNegFlags.NonNeg = PNNI->hasNonNeg();
814 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
815 OpType = OperationType::FPMathOp;
816 FMFs = Op->getFastMathFlags();
817 }
818 }
819
820 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
822 assert(getPredicate() == Pred && "predicate truncated");
823 }
824
826 : OpType(OperationType::FCmp), AllFlags() {
828 assert(getPredicate() == Pred && "predicate truncated");
829 FCmpFlags.FMFs = FMFs;
830 }
831
833 : OpType(OperationType::OverflowingBinOp), AllFlags() {
834 this->WrapFlags = WrapFlags;
835 }
836
838 : OpType(OperationType::Trunc), AllFlags() {
839 this->TruncFlags = TruncFlags;
840 }
841
842 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
843 this->FMFs = FMFs;
844 }
845
847 : OpType(OperationType::DisjointOp), AllFlags() {
848 this->DisjointFlags = DisjointFlags;
849 }
850
852 : OpType(OperationType::NonNegOp), AllFlags() {
853 this->NonNegFlags = NonNegFlags;
854 }
855
856 VPIRFlags(ExactFlagsTy ExactFlags)
857 : OpType(OperationType::PossiblyExactOp), AllFlags() {
858 this->ExactFlags = ExactFlags;
859 }
860
862 : OpType(OperationType::GEPOp), AllFlags() {
863 GEPFlagsStorage = GEPFlags.getRaw();
864 }
865
866 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
867 : OpType(OperationType::ReductionOp), AllFlags() {
868 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
869 }
870
872 OpType = Other.OpType;
873 AllFlags[0] = Other.AllFlags[0];
874 AllFlags[1] = Other.AllFlags[1];
875 }
876
877 /// Only keep flags also present in \p Other. \p Other must have the same
878 /// OpType as the current object.
879 void intersectFlags(const VPIRFlags &Other);
880
881 /// Drop all poison-generating flags.
883 // NOTE: This needs to be kept in-sync with
884 // Instruction::dropPoisonGeneratingFlags.
885 switch (OpType) {
886 case OperationType::OverflowingBinOp:
887 WrapFlags.HasNUW = false;
888 WrapFlags.HasNSW = false;
889 break;
890 case OperationType::Trunc:
891 TruncFlags.HasNUW = false;
892 TruncFlags.HasNSW = false;
893 break;
894 case OperationType::DisjointOp:
895 DisjointFlags.IsDisjoint = false;
896 break;
897 case OperationType::PossiblyExactOp:
898 ExactFlags.IsExact = false;
899 break;
900 case OperationType::GEPOp:
901 GEPFlagsStorage = 0;
902 break;
903 case OperationType::FPMathOp:
904 case OperationType::FCmp:
905 case OperationType::ReductionOp:
906 getFMFsRef().NoNaNs = false;
907 getFMFsRef().NoInfs = false;
908 break;
909 case OperationType::NonNegOp:
910 NonNegFlags.NonNeg = false;
911 break;
912 case OperationType::Cmp:
913 case OperationType::Other:
914 break;
915 }
916 }
917
918 /// Apply the IR flags to \p I.
919 void applyFlags(Instruction &I) const {
920 switch (OpType) {
921 case OperationType::OverflowingBinOp:
922 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
923 I.setHasNoSignedWrap(WrapFlags.HasNSW);
924 break;
925 case OperationType::Trunc:
926 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
927 I.setHasNoSignedWrap(TruncFlags.HasNSW);
928 break;
929 case OperationType::DisjointOp:
930 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
931 break;
932 case OperationType::PossiblyExactOp:
933 I.setIsExact(ExactFlags.IsExact);
934 break;
935 case OperationType::GEPOp:
936 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
938 break;
939 case OperationType::FPMathOp:
940 case OperationType::FCmp: {
941 const FastMathFlagsTy &F = getFMFsRef();
942 I.setHasAllowReassoc(F.AllowReassoc);
943 I.setHasNoNaNs(F.NoNaNs);
944 I.setHasNoInfs(F.NoInfs);
945 I.setHasNoSignedZeros(F.NoSignedZeros);
946 I.setHasAllowReciprocal(F.AllowReciprocal);
947 I.setHasAllowContract(F.AllowContract);
948 I.setHasApproxFunc(F.ApproxFunc);
949 break;
950 }
951 case OperationType::NonNegOp:
952 I.setNonNeg(NonNegFlags.NonNeg);
953 break;
954 case OperationType::ReductionOp:
955 llvm_unreachable("reduction ops should not use applyFlags");
956 case OperationType::Cmp:
957 case OperationType::Other:
958 break;
959 }
960 }
961
963 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
964 "recipe doesn't have a compare predicate");
965 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
968 }
969
971 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
972 "recipe doesn't have a compare predicate");
973 if (OpType == OperationType::FCmp)
975 else
977 assert(getPredicate() == Pred && "predicate truncated");
978 }
979
983
984 /// Returns true if the recipe has a comparison predicate.
985 bool hasPredicate() const {
986 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
987 }
988
989 /// Returns true if the recipe has fast-math flags.
990 bool hasFastMathFlags() const {
991 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
992 OpType == OperationType::ReductionOp;
993 }
994
996
997 /// Returns true if the recipe has non-negative flag.
998 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
999
1000 bool isNonNeg() const {
1001 assert(OpType == OperationType::NonNegOp &&
1002 "recipe doesn't have a NNEG flag");
1003 return NonNegFlags.NonNeg;
1004 }
1005
1006 bool hasNoUnsignedWrap() const {
1007 switch (OpType) {
1008 case OperationType::OverflowingBinOp:
1009 return WrapFlags.HasNUW;
1010 case OperationType::Trunc:
1011 return TruncFlags.HasNUW;
1012 default:
1013 llvm_unreachable("recipe doesn't have a NUW flag");
1014 }
1015 }
1016
1017 bool hasNoSignedWrap() const {
1018 switch (OpType) {
1019 case OperationType::OverflowingBinOp:
1020 return WrapFlags.HasNSW;
1021 case OperationType::Trunc:
1022 return TruncFlags.HasNSW;
1023 default:
1024 llvm_unreachable("recipe doesn't have a NSW flag");
1025 }
1026 }
1027
1028 bool hasNoWrapFlags() const {
1029 switch (OpType) {
1030 case OperationType::OverflowingBinOp:
1031 case OperationType::Trunc:
1032 return true;
1033 default:
1034 return false;
1035 }
1036 }
1037
1039 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1040 }
1041
1042 bool isDisjoint() const {
1043 assert(OpType == OperationType::DisjointOp &&
1044 "recipe cannot have a disjoing flag");
1045 return DisjointFlags.IsDisjoint;
1046 }
1047
1049 assert(OpType == OperationType::ReductionOp &&
1050 "recipe doesn't have reduction flags");
1051 return static_cast<RecurKind>(ReductionFlags.Kind);
1052 }
1053
1054 bool isReductionOrdered() const {
1055 assert(OpType == OperationType::ReductionOp &&
1056 "recipe doesn't have reduction flags");
1057 return ReductionFlags.IsOrdered;
1058 }
1059
1060 bool isReductionInLoop() const {
1061 assert(OpType == OperationType::ReductionOp &&
1062 "recipe doesn't have reduction flags");
1063 return ReductionFlags.IsInLoop;
1064 }
1065
1066private:
1067 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1068 FastMathFlagsTy &getFMFsRef() {
1069 if (OpType == OperationType::FCmp)
1070 return FCmpFlags.FMFs;
1071 if (OpType == OperationType::ReductionOp)
1072 return ReductionFlags.FMFs;
1073 return FMFs;
1074 }
1075 const FastMathFlagsTy &getFMFsRef() const {
1076 if (OpType == OperationType::FCmp)
1077 return FCmpFlags.FMFs;
1078 if (OpType == OperationType::ReductionOp)
1079 return ReductionFlags.FMFs;
1080 return FMFs;
1081 }
1082
1083public:
1084 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1085 /// otherwise. Opcodes not supporting default flags include compares and
1086 /// ComputeReductionResult.
1087 static VPIRFlags getDefaultFlags(unsigned Opcode);
1088
1089#if !defined(NDEBUG)
1090 /// Returns true if the set flags are valid for \p Opcode.
1091 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1092
1093 /// Returns true if \p Opcode has its required flags set.
1094 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1095#endif
1096
1097#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1098 void printFlags(raw_ostream &O) const;
1099#endif
1100};
1102
1103static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1104
1105/// A pure-virtual common base class for recipes defining a single VPValue and
1106/// using IR flags.
1108 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1109 const VPIRFlags &Flags,
1111 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1112
1113 static inline bool classof(const VPRecipeBase *R) {
1114 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1115 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1116 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1117 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1118 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1119 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1120 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1121 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1122 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1123 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1124 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1125 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1126 }
1127
1128 static inline bool classof(const VPUser *U) {
1129 auto *R = dyn_cast<VPRecipeBase>(U);
1130 return R && classof(R);
1131 }
1132
1133 static inline bool classof(const VPValue *V) {
1134 auto *R = V->getDefiningRecipe();
1135 return R && classof(R);
1136 }
1137
1139
1140 static inline bool classof(const VPSingleDefRecipe *R) {
1141 return classof(static_cast<const VPRecipeBase *>(R));
1142 }
1143
1144 void execute(VPTransformState &State) override = 0;
1145
1146 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1148 VPCostContext &Ctx) const;
1149};
1150
1151/// Helper to access the operand that contains the unroll part for this recipe
1152/// after unrolling.
1153template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1154protected:
1155 /// Return the VPValue operand containing the unroll part or null if there is
1156 /// no such operand.
1157 VPValue *getUnrollPartOperand(const VPUser &U) const;
1158
1159 /// Return the unroll part.
1160 unsigned getUnrollPart(const VPUser &U) const;
1161};
1162
1163/// Helper to manage IR metadata for recipes. It filters out metadata that
1164/// cannot be propagated.
1167
1168public:
1169 VPIRMetadata() = default;
1170
1171 /// Adds metatadata that can be preserved from the original instruction
1172 /// \p I.
1174
1175 /// Copy constructor for cloning.
1177
1179
1180 /// Add all metadata to \p I.
1181 void applyMetadata(Instruction &I) const;
1182
1183 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1184 /// already exists, it will be replaced. Otherwise, it will be added.
1185 void setMetadata(unsigned Kind, MDNode *Node) {
1186 auto It =
1187 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1188 return P.first == Kind;
1189 });
1190 if (It != Metadata.end())
1191 It->second = Node;
1192 else
1193 Metadata.emplace_back(Kind, Node);
1194 }
1195
1196 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1197 /// nodes that are common to both.
1198 void intersect(const VPIRMetadata &MD);
1199
1200 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1201 MDNode *getMetadata(unsigned Kind) const {
1202 auto It =
1203 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1204 return It != Metadata.end() ? It->second : nullptr;
1205 }
1206
1207#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1208 /// Print metadata with node IDs.
1209 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1210#endif
1211};
1212
1213/// This is a concrete Recipe that models a single VPlan-level instruction.
1214/// While as any Recipe it may generate a sequence of IR instructions when
1215/// executed, these instructions would always form a single-def expression as
1216/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1217/// opcodes can take an optional mask. Masks may be assigned during
1218/// predication.
1220 public VPIRMetadata {
1221public:
1222 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1223 enum {
1225 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1226 // values of a first-order recurrence.
1228 // Creates a mask where each lane is active (true) whilst the current
1229 // counter (first operand + index) is less than the second operand. i.e.
1230 // mask[i] = icmpt ult (op0 + i), op1
1231 // The size of the mask returned is VF * Multiplier (UF, third op).
1235 // Increment the canonical IV separately for each unrolled part.
1237 // Abstract instruction that compares two values and branches. This is
1238 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1241 // Branch with 2 boolean condition operands and 3 successors. If condition
1242 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1243 // successor 1; otherwise branches to successor 2. Expanded after region
1244 // dissolution into: (1) an OR of the two conditions branching to
1245 // middle.split or successor 2, and (2) middle.split branching to successor
1246 // 0 or successor 1 based on condition 0.
1249 /// Given operands of (the same) struct type, creates a struct of fixed-
1250 /// width vectors each containing a struct field of all operands. The
1251 /// number of operands matches the element count of every vector.
1253 /// Creates a fixed-width vector containing all operands. The number of
1254 /// operands matches the vector element count.
1256 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1257 /// abstract VPInstruction whose single defined VPValue represents VF
1258 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1259 /// VPInstructions.
1261 /// Reduce the operands to the final reduction result using the operation
1262 /// specified via the operation's VPIRFlags.
1264 // Extracts the last part of its operand. Removed during unrolling.
1266 // Extracts the last lane of its vector operand, per part.
1268 // Extracts the second-to-last lane from its operand or the second-to-last
1269 // part if it is scalar. In the latter case, the recipe will be removed
1270 // during unrolling.
1272 LogicalAnd, // Non-poison propagating logical And.
1273 LogicalOr, // Non-poison propagating logical Or.
1274 // Add an offset in bytes (second operand) to a base pointer (first
1275 // operand). Only generates scalar values (either for the first lane only or
1276 // for all lanes, depending on its uses).
1278 // Add a vector offset in bytes (second operand) to a scalar base pointer
1279 // (first operand).
1281 // Returns a scalar boolean value, which is true if any lane of its
1282 // (boolean) vector operands is true. It produces the reduced value across
1283 // all unrolled iterations. Unrolling will add all copies of its original
1284 // operand as additional operands. AnyOf is poison-safe as all operands
1285 // will be frozen.
1287 // Calculates the first active lane index of the vector predicate operands.
1288 // It produces the lane index across all unrolled iterations. Unrolling will
1289 // add all copies of its original operand as additional operands.
1290 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1291 // result even with operands that are all zeroes.
1293 // Calculates the last active lane index of the vector predicate operands.
1294 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1295 // tail-folding to extract the correct live-out value from the last active
1296 // iteration. It produces the lane index across all unrolled iterations.
1297 // Unrolling will add all copies of its original operand as additional
1298 // operands.
1300 // Returns a reversed vector for the operand.
1302
1303 // The opcodes below are used for VPInstructionWithType.
1304 //
1305 /// Scale the first operand (vector step) by the second operand
1306 /// (scalar-step). Casts both operands to the result type if needed.
1308 /// Start vector for reductions with 3 operands: the original start value,
1309 /// the identity value for the reduction and an integer indicating the
1310 /// scaling factor.
1312 // Creates a step vector starting from 0 to VF with a step of 1.
1314 /// Extracts a single lane (first operand) from a set of vector operands.
1315 /// The lane specifies an index into a vector formed by combining all vector
1316 /// operands (all operands after the first one).
1318 /// Explicit user for the resume phi of the canonical induction in the main
1319 /// VPlan, used by the epilogue vector loop.
1321 /// Extracts the last active lane from a set of vectors. The first operand
1322 /// is the default value if no lanes in the masks are active. Conceptually,
1323 /// this concatenates all data vectors (odd operands), concatenates all
1324 /// masks (even operands -- ignoring the default value), and returns the
1325 /// last active value from the combined data vector using the combined mask.
1327
1328 /// Returns the value for vscale.
1330 /// Compute the exiting value of a wide induction after vectorization, that
1331 /// is the value of the last lane of the induction increment (i.e. its
1332 /// backedge value). Has the wide induction recipe as operand.
1336 };
1337
1338 /// Returns true if this VPInstruction generates scalar values for all lanes.
1339 /// Most VPInstructions generate a single value per part, either vector or
1340 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1341 /// values per all lanes, stemming from an original ingredient. This method
1342 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1343 /// underlying ingredient.
1344 bool doesGeneratePerAllLanes() const;
1345
1346 /// Return the number of operands determined by the opcode of the
1347 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1348 /// cannot be determined directly by the opcode.
1349 unsigned getNumOperandsForOpcode() const;
1350
1351private:
1352 typedef unsigned char OpcodeTy;
1353 OpcodeTy Opcode;
1354
1355 /// An optional name that can be used for the generated IR instruction.
1356 std::string Name;
1357
1358 /// Returns true if we can generate a scalar for the first lane only if
1359 /// needed.
1360 bool canGenerateScalarForFirstLane() const;
1361
1362 /// Utility methods serving execute(): generates a single vector instance of
1363 /// the modeled instruction. \returns the generated value. . In some cases an
1364 /// existing value is returned rather than a generated one.
1365 Value *generate(VPTransformState &State);
1366
1367 /// Returns true if the VPInstruction does not need masking.
1368 bool alwaysUnmasked() const {
1369 if (Opcode == VPInstruction::MaskedCond)
1370 return false;
1371
1372 // For now only VPInstructions with underlying values use masks.
1373 // TODO: provide masks to VPInstructions w/o underlying values.
1374 if (!getUnderlyingValue())
1375 return true;
1376
1377 return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr;
1378 }
1379
1380public:
1381 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1382 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1383 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1384
1385 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1386
1387 VPInstruction *clone() override {
1388 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1389 getDebugLoc(), Name);
1390 if (getUnderlyingValue())
1391 New->setUnderlyingValue(getUnderlyingInstr());
1392 return New;
1393 }
1394
1395 unsigned getOpcode() const { return Opcode; }
1396
1397 /// Generate the instruction.
1398 /// TODO: We currently execute only per-part unless a specific instance is
1399 /// provided.
1400 void execute(VPTransformState &State) override;
1401
1402 /// Return the cost of this VPInstruction.
1403 InstructionCost computeCost(ElementCount VF,
1404 VPCostContext &Ctx) const override;
1405
1406#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1407 /// Print the VPInstruction to dbgs() (for debugging).
1408 LLVM_DUMP_METHOD void dump() const;
1409#endif
1410
1411 bool hasResult() const {
1412 // CallInst may or may not have a result, depending on the called function.
1413 // Conservatively return calls have results for now.
1414 switch (getOpcode()) {
1415 case Instruction::Ret:
1416 case Instruction::UncondBr:
1417 case Instruction::CondBr:
1418 case Instruction::Store:
1419 case Instruction::Switch:
1420 case Instruction::IndirectBr:
1421 case Instruction::Resume:
1422 case Instruction::CatchRet:
1423 case Instruction::Unreachable:
1424 case Instruction::Fence:
1425 case Instruction::AtomicRMW:
1429 return false;
1430 default:
1431 return true;
1432 }
1433 }
1434
1435 /// Returns true if the VPInstruction has a mask operand.
1436 bool isMasked() const {
1437 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1438 // VPInstructions without a fixed number of operands cannot be masked.
1439 if (NumOpsForOpcode == -1u)
1440 return false;
1441 return NumOpsForOpcode + 1 == getNumOperands();
1442 }
1443
1444 /// Returns the number of operands, excluding the mask if the VPInstruction is
1445 /// masked.
1446 unsigned getNumOperandsWithoutMask() const {
1447 return getNumOperands() - isMasked();
1448 }
1449
1450 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1451 void addMask(VPValue *Mask) {
1452 assert(!isMasked() && "recipe is already masked");
1453 if (alwaysUnmasked())
1454 return;
1455 addOperand(Mask);
1456 }
1457
1458 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1459 /// VPInstructions.
1460 VPValue *getMask() const {
1461 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1462 }
1463
1464 /// Returns an iterator range over the operands excluding the mask operand
1465 /// if present.
1472
1473 /// Returns true if the underlying opcode may read from or write to memory.
1474 bool opcodeMayReadOrWriteFromMemory() const;
1475
1476 /// Returns true if the recipe only uses the first lane of operand \p Op.
1477 bool usesFirstLaneOnly(const VPValue *Op) const override;
1478
1479 /// Returns true if the recipe only uses the first part of operand \p Op.
1480 bool usesFirstPartOnly(const VPValue *Op) const override;
1481
1482 /// Returns true if this VPInstruction produces a scalar value from a vector,
1483 /// e.g. by performing a reduction or extracting a lane.
1484 bool isVectorToScalar() const;
1485
1486 /// Returns true if this VPInstruction's operands are single scalars and the
1487 /// result is also a single scalar.
1488 bool isSingleScalar() const;
1489
1490 /// Returns the symbolic name assigned to the VPInstruction.
1491 StringRef getName() const { return Name; }
1492
1493 /// Set the symbolic name for the VPInstruction.
1494 void setName(StringRef NewName) { Name = NewName.str(); }
1495
1496protected:
1497#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1498 /// Print the VPInstruction to \p O.
1499 void printRecipe(raw_ostream &O, const Twine &Indent,
1500 VPSlotTracker &SlotTracker) const override;
1501#endif
1502};
1503
1504/// A specialization of VPInstruction augmenting it with a dedicated result
1505/// type, to be used when the opcode and operands of the VPInstruction don't
1506/// directly determine the result type. Note that there is no separate recipe ID
1507/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1508/// distinguished purely by the opcode.
1510 /// Scalar result type produced by the recipe.
1511 Type *ResultTy;
1512
1513public:
1515 Type *ResultTy, const VPIRFlags &Flags = {},
1516 const VPIRMetadata &Metadata = {},
1518 const Twine &Name = "")
1519 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1520 ResultTy(ResultTy) {}
1521
1522 static inline bool classof(const VPRecipeBase *R) {
1523 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1524 // type information.
1525 if (R->isScalarCast())
1526 return true;
1527 auto *VPI = dyn_cast<VPInstruction>(R);
1528 if (!VPI)
1529 return false;
1530 switch (VPI->getOpcode()) {
1534 case Instruction::Load:
1535 return true;
1536 default:
1537 return false;
1538 }
1539 }
1540
1541 static inline bool classof(const VPUser *R) {
1543 }
1544
1545 VPInstruction *clone() override {
1546 auto *New =
1548 *this, *this, getDebugLoc(), getName());
1549 New->setUnderlyingValue(getUnderlyingValue());
1550 return New;
1551 }
1552
1553 void execute(VPTransformState &State) override;
1554
1555 /// Return the cost of this VPInstruction.
1557 VPCostContext &Ctx) const override {
1558 // TODO: Compute accurate cost after retiring the legacy cost model.
1559 return 0;
1560 }
1561
1562 Type *getResultType() const { return ResultTy; }
1563
1564protected:
1565#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1566 /// Print the recipe.
1567 void printRecipe(raw_ostream &O, const Twine &Indent,
1568 VPSlotTracker &SlotTracker) const override;
1569#endif
1570};
1571
1572/// Helper type to provide functions to access incoming values and blocks for
1573/// phi-like recipes.
1575protected:
1576 /// Return a VPRecipeBase* to the current object.
1577 virtual const VPRecipeBase *getAsRecipe() const = 0;
1578
1579public:
1580 virtual ~VPPhiAccessors() = default;
1581
1582 /// Returns the incoming VPValue with index \p Idx.
1583 VPValue *getIncomingValue(unsigned Idx) const {
1584 return getAsRecipe()->getOperand(Idx);
1585 }
1586
1587 /// Returns the incoming block with index \p Idx.
1588 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1589
1590 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1591 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1592
1593 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1594 /// block.
1595 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1596
1597 /// Returns the number of incoming values, also number of incoming blocks.
1598 virtual unsigned getNumIncoming() const {
1599 return getAsRecipe()->getNumOperands();
1600 }
1601
1602 /// Returns an interator range over the incoming values.
1604 return make_range(getAsRecipe()->op_begin(),
1605 getAsRecipe()->op_begin() + getNumIncoming());
1606 }
1607
1609 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1610
1611 /// Returns an iterator range over the incoming blocks.
1613 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1614 return getIncomingBlock(Idx);
1615 };
1616 return map_range(index_range(0, getNumIncoming()), GetBlock);
1617 }
1618
1619 /// Returns an iterator range over pairs of incoming values and corresponding
1620 /// incoming blocks.
1626
1627 /// Removes the incoming value for \p IncomingBlock, which must be a
1628 /// predecessor.
1629 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1630
1631#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1632 /// Print the recipe.
1634#endif
1635};
1636
1639 const Twine &Name = "")
1640 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1641
1642 static inline bool classof(const VPUser *U) {
1643 auto *VPI = dyn_cast<VPInstruction>(U);
1644 return VPI && VPI->getOpcode() == Instruction::PHI;
1645 }
1646
1647 static inline bool classof(const VPValue *V) {
1648 auto *VPI = dyn_cast<VPInstruction>(V);
1649 return VPI && VPI->getOpcode() == Instruction::PHI;
1650 }
1651
1652 static inline bool classof(const VPSingleDefRecipe *SDR) {
1653 auto *VPI = dyn_cast<VPInstruction>(SDR);
1654 return VPI && VPI->getOpcode() == Instruction::PHI;
1655 }
1656
1657 VPPhi *clone() override {
1658 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1659 PhiR->setUnderlyingValue(getUnderlyingValue());
1660 return PhiR;
1661 }
1662
1663 void execute(VPTransformState &State) override;
1664
1665protected:
1666#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1667 /// Print the recipe.
1668 void printRecipe(raw_ostream &O, const Twine &Indent,
1669 VPSlotTracker &SlotTracker) const override;
1670#endif
1671
1672 const VPRecipeBase *getAsRecipe() const override { return this; }
1673};
1674
1675/// A recipe to wrap on original IR instruction not to be modified during
1676/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1677/// Expect PHIs, VPIRInstructions cannot have any operands.
1679 Instruction &I;
1680
1681protected:
1682 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1683 /// subclasses may need to be created, e.g. VPIRPhi.
1685 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1686
1687public:
1688 ~VPIRInstruction() override = default;
1689
1690 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1691 /// VPIRInstruction.
1693
1694 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1695
1697 auto *R = create(I);
1698 for (auto *Op : operands())
1699 R->addOperand(Op);
1700 return R;
1701 }
1702
1703 void execute(VPTransformState &State) override;
1704
1705 /// Return the cost of this VPIRInstruction.
1707 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1708
1709 Instruction &getInstruction() const { return I; }
1710
1711 bool usesScalars(const VPValue *Op) const override {
1713 "Op must be an operand of the recipe");
1714 return true;
1715 }
1716
1717 bool usesFirstPartOnly(const VPValue *Op) const override {
1719 "Op must be an operand of the recipe");
1720 return true;
1721 }
1722
1723 bool usesFirstLaneOnly(const VPValue *Op) const override {
1725 "Op must be an operand of the recipe");
1726 return true;
1727 }
1728
1729protected:
1730#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1731 /// Print the recipe.
1732 void printRecipe(raw_ostream &O, const Twine &Indent,
1733 VPSlotTracker &SlotTracker) const override;
1734#endif
1735};
1736
1737/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1738/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1739/// allowed, and it is used to add a new incoming value for the single
1740/// predecessor VPBB.
1742 public VPPhiAccessors {
1744
1745 static inline bool classof(const VPRecipeBase *U) {
1746 auto *R = dyn_cast<VPIRInstruction>(U);
1747 return R && isa<PHINode>(R->getInstruction());
1748 }
1749
1750 static inline bool classof(const VPUser *U) {
1751 auto *R = dyn_cast<VPRecipeBase>(U);
1752 return R && classof(R);
1753 }
1754
1756
1757 void execute(VPTransformState &State) override;
1758
1759protected:
1760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1761 /// Print the recipe.
1762 void printRecipe(raw_ostream &O, const Twine &Indent,
1763 VPSlotTracker &SlotTracker) const override;
1764#endif
1765
1766 const VPRecipeBase *getAsRecipe() const override { return this; }
1767};
1768
1769/// VPWidenRecipe is a recipe for producing a widened instruction using the
1770/// opcode and operands of the recipe. This recipe covers most of the
1771/// traditional vectorization cases where each recipe transforms into a
1772/// vectorized version of itself.
1774 public VPIRMetadata {
1775 unsigned Opcode;
1776
1777public:
1779 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1780 DebugLoc DL = {})
1781 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1782 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1783 setUnderlyingValue(&I);
1784 }
1785
1786 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1787 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1788 DebugLoc DL = {})
1789 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1790 VPIRMetadata(Metadata), Opcode(Opcode) {}
1791
1792 ~VPWidenRecipe() override = default;
1793
1794 VPWidenRecipe *clone() override {
1795 if (auto *UV = getUnderlyingValue())
1796 return new VPWidenRecipe(*cast<Instruction>(UV), operands(), *this, *this,
1797 getDebugLoc());
1798 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1799 }
1800
1801 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1802
1803 /// Produce a widened instruction using the opcode and operands of the recipe,
1804 /// processing State.VF elements.
1805 void execute(VPTransformState &State) override;
1806
1807 /// Return the cost of this VPWidenRecipe.
1808 InstructionCost computeCost(ElementCount VF,
1809 VPCostContext &Ctx) const override;
1810
1811 unsigned getOpcode() const { return Opcode; }
1812
1813protected:
1814#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1815 /// Print the recipe.
1816 void printRecipe(raw_ostream &O, const Twine &Indent,
1817 VPSlotTracker &SlotTracker) const override;
1818#endif
1819
1820 /// Returns true if the recipe only uses the first lane of operand \p Op.
1821 bool usesFirstLaneOnly(const VPValue *Op) const override {
1823 "Op must be an operand of the recipe");
1824 return Opcode == Instruction::Select && Op == getOperand(0) &&
1825 Op->isDefinedOutsideLoopRegions();
1826 }
1827};
1828
1829/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1831 /// Cast instruction opcode.
1832 Instruction::CastOps Opcode;
1833
1834 /// Result type for the cast.
1835 Type *ResultTy;
1836
1837public:
1839 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1840 const VPIRMetadata &Metadata = {},
1842 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1843 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1844 assert(flagsValidForOpcode(Opcode) &&
1845 "Set flags not supported for the provided opcode");
1847 "Opcode requires specific flags to be set");
1849 }
1850
1851 ~VPWidenCastRecipe() override = default;
1852
1854 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1856 *this, *this, getDebugLoc());
1857 }
1858
1859 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1860
1861 /// Produce widened copies of the cast.
1862 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1863
1864 /// Return the cost of this VPWidenCastRecipe.
1866 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1867
1868 Instruction::CastOps getOpcode() const { return Opcode; }
1869
1870 /// Returns the result type of the cast.
1871 Type *getResultType() const { return ResultTy; }
1872
1873protected:
1874#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1875 /// Print the recipe.
1876 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1877 VPSlotTracker &SlotTracker) const override;
1878#endif
1879};
1880
1881/// A recipe for widening vector intrinsics.
1883 /// ID of the vector intrinsic to widen.
1884 Intrinsic::ID VectorIntrinsicID;
1885
1886 /// Scalar return type of the intrinsic.
1887 Type *ResultTy;
1888
1889 /// True if the intrinsic may read from memory.
1890 bool MayReadFromMemory;
1891
1892 /// True if the intrinsic may read write to memory.
1893 bool MayWriteToMemory;
1894
1895 /// True if the intrinsic may have side-effects.
1896 bool MayHaveSideEffects;
1897
1898public:
1900 ArrayRef<VPValue *> CallArguments, Type *Ty,
1901 const VPIRFlags &Flags = {},
1902 const VPIRMetadata &MD = {},
1904 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1905 Flags, DL),
1906 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1907 MayReadFromMemory(CI.mayReadFromMemory()),
1908 MayWriteToMemory(CI.mayWriteToMemory()),
1909 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1910 setUnderlyingValue(&CI);
1911 }
1912
1914 ArrayRef<VPValue *> CallArguments, Type *Ty,
1915 const VPIRFlags &Flags = {},
1916 const VPIRMetadata &Metadata = {},
1918 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1919 Flags, DL),
1920 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1921 ResultTy(Ty) {
1922 LLVMContext &Ctx = Ty->getContext();
1923 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1924 MemoryEffects ME = Attrs.getMemoryEffects();
1925 MayReadFromMemory = !ME.onlyWritesMemory();
1926 MayWriteToMemory = !ME.onlyReadsMemory();
1927 MayHaveSideEffects = MayWriteToMemory ||
1928 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1929 !Attrs.hasAttribute(Attribute::WillReturn);
1930 }
1931
1932 ~VPWidenIntrinsicRecipe() override = default;
1933
1935 if (Value *CI = getUnderlyingValue())
1936 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1937 operands(), ResultTy, *this, *this,
1938 getDebugLoc());
1939 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1940 *this, *this, getDebugLoc());
1941 }
1942
1943 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1944
1945 /// Produce a widened version of the vector intrinsic.
1946 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1947
1948 /// Compute the cost of a vector intrinsic with \p ID and \p Operands.
1951 const VPRecipeWithIRFlags &R,
1952 ElementCount VF, VPCostContext &Ctx);
1953
1954 /// Return the cost of this vector intrinsic.
1956 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1957
1958 /// Return the ID of the intrinsic.
1959 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1960
1961 /// Return the scalar return type of the intrinsic.
1962 Type *getResultType() const { return ResultTy; }
1963
1964 /// Return to name of the intrinsic as string.
1966
1967 /// Returns true if the intrinsic may read from memory.
1968 bool mayReadFromMemory() const { return MayReadFromMemory; }
1969
1970 /// Returns true if the intrinsic may write to memory.
1971 bool mayWriteToMemory() const { return MayWriteToMemory; }
1972
1973 /// Returns true if the intrinsic may have side-effects.
1974 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1975
1976 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1977
1978protected:
1979#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1980 /// Print the recipe.
1981 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1982 VPSlotTracker &SlotTracker) const override;
1983#endif
1984};
1985
1986/// A recipe for widening Call instructions using library calls.
1988 public VPIRMetadata {
1989 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1990 /// between a given VF and the chosen vectorized variant, so there will be a
1991 /// different VPlan for each VF with a valid variant.
1992 Function *Variant;
1993
1994public:
1996 ArrayRef<VPValue *> CallArguments,
1997 const VPIRFlags &Flags = {},
1998 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1999 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
2000 DL),
2001 VPIRMetadata(Metadata), Variant(Variant) {
2002 setUnderlyingValue(UV);
2003 assert(
2004 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2005 "last operand must be the called function");
2006 }
2007
2008 ~VPWidenCallRecipe() override = default;
2009
2011 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2012 *this, *this, getDebugLoc());
2013 }
2014
2015 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2016
2017 /// Produce a widened version of the call instruction.
2018 void execute(VPTransformState &State) override;
2019
2020 /// Return the cost of this VPWidenCallRecipe.
2021 InstructionCost computeCost(ElementCount VF,
2022 VPCostContext &Ctx) const override;
2023
2024 /// Return the cost of widening a call using the vector function \p Variant.
2025 static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx);
2026
2030
2033
2034 /// Returns true if the recipe only uses the first lane of operand \p Op.
2035 bool usesFirstLaneOnly(const VPValue *Op) const override;
2036
2037protected:
2038#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2039 /// Print the recipe.
2040 void printRecipe(raw_ostream &O, const Twine &Indent,
2041 VPSlotTracker &SlotTracker) const override;
2042#endif
2043};
2044
2045/// A recipe representing a sequence of load -> update -> store as part of
2046/// a histogram operation. This means there may be aliasing between vector
2047/// lanes, which is handled by the llvm.experimental.vector.histogram family
2048/// of intrinsics. The only update operations currently supported are
2049/// 'add' and 'sub' where the other term is loop-invariant.
2051 /// Opcode of the update operation, currently either add or sub.
2052 unsigned Opcode;
2053
2054public:
2055 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2057 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2058 Opcode(Opcode) {}
2059
2060 ~VPHistogramRecipe() override = default;
2061
2063 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
2064 }
2065
2066 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2067
2068 /// Produce a vectorized histogram operation.
2069 void execute(VPTransformState &State) override;
2070
2071 /// Return the cost of this VPHistogramRecipe.
2073 VPCostContext &Ctx) const override;
2074
2075 unsigned getOpcode() const { return Opcode; }
2076
2077 /// Return the mask operand if one was provided, or a null pointer if all
2078 /// lanes should be executed unconditionally.
2079 VPValue *getMask() const {
2080 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2081 }
2082
2083protected:
2084#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2085 /// Print the recipe
2086 void printRecipe(raw_ostream &O, const Twine &Indent,
2087 VPSlotTracker &SlotTracker) const override;
2088#endif
2089};
2090
2091/// A recipe for handling GEP instructions.
2093 Type *SourceElementTy;
2094
2095 bool isPointerLoopInvariant() const {
2096 return getOperand(0)->isDefinedOutsideLoopRegions();
2097 }
2098
2099 bool isIndexLoopInvariant(unsigned I) const {
2100 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2101 }
2102
2103public:
2105 const VPIRFlags &Flags = {},
2107 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
2108 SourceElementTy(GEP->getSourceElementType()) {
2109 setUnderlyingValue(GEP);
2111 (void)Metadata;
2113 assert(Metadata.empty() && "unexpected metadata on GEP");
2114 }
2115
2116 ~VPWidenGEPRecipe() override = default;
2117
2120 operands(), *this, getDebugLoc());
2121 }
2122
2123 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2124
2125 /// This recipe generates a GEP instruction.
2126 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2127
2128 /// Generate the gep nodes.
2129 void execute(VPTransformState &State) override;
2130
2131 Type *getSourceElementType() const { return SourceElementTy; }
2132
2133 /// Return the cost of this VPWidenGEPRecipe.
2135 VPCostContext &Ctx) const override {
2136 // TODO: Compute accurate cost after retiring the legacy cost model.
2137 return 0;
2138 }
2139
2140 /// Returns true if the recipe only uses the first lane of operand \p Op.
2141 bool usesFirstLaneOnly(const VPValue *Op) const override;
2142
2143protected:
2144#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2145 /// Print the recipe.
2146 void printRecipe(raw_ostream &O, const Twine &Indent,
2147 VPSlotTracker &SlotTracker) const override;
2148#endif
2149};
2150
2151/// A recipe to compute a pointer to the last element of each part of a widened
2152/// memory access for widened memory accesses of SourceElementTy. Used for
2153/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2154/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2155/// unroller otherwise.
2157 Type *SourceElementTy;
2158
2159 /// The constant stride of the pointer computed by this recipe, expressed in
2160 /// units of SourceElementTy.
2161 int64_t Stride;
2162
2163public:
2164 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2165 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2166 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2167 GEPFlags, DL),
2168 SourceElementTy(SourceElementTy), Stride(Stride) {
2169 assert(Stride < 0 && "Stride must be negative");
2170 }
2171
2172 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2173
2174 Type *getSourceElementType() const { return SourceElementTy; }
2175 int64_t getStride() const { return Stride; }
2176 VPValue *getPointer() const { return getOperand(0); }
2177 VPValue *getVFValue() const { return getOperand(1); }
2179 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2180 }
2181
2182 /// Adds the offset operand to the recipe.
2183 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2184 void materializeOffset(unsigned Part = 0);
2185
2186 void execute(VPTransformState &State) override;
2187
2188 bool usesFirstLaneOnly(const VPValue *Op) const override {
2190 "Op must be an operand of the recipe");
2191 return true;
2192 }
2193
2194 /// Return the cost of this VPVectorPointerRecipe.
2196 VPCostContext &Ctx) const override {
2197 // TODO: Compute accurate cost after retiring the legacy cost model.
2198 return 0;
2199 }
2200
2201 /// Returns true if the recipe only uses the first part of operand \p Op.
2202 bool usesFirstPartOnly(const VPValue *Op) const override {
2204 "Op must be an operand of the recipe");
2205 assert(getNumOperands() <= 2 && "must have at most two operands");
2206 return true;
2207 }
2208
2210 auto *VEPR = new VPVectorEndPointerRecipe(
2213 if (auto *Offset = getOffset())
2214 VEPR->addOperand(Offset);
2215 return VEPR;
2216 }
2217
2218protected:
2219#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2220 /// Print the recipe.
2221 void printRecipe(raw_ostream &O, const Twine &Indent,
2222 VPSlotTracker &SlotTracker) const override;
2223#endif
2224};
2225
2226/// A recipe to compute the pointers for widened memory accesses of \p
2227/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2228/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2230 Type *SourceElementTy;
2231
2232public:
2233 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2234 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2235 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2236 SourceElementTy(SourceElementTy) {}
2237
2238 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2239
2241 return getNumOperands() == 2 ? getOperand(1) : nullptr;
2242 }
2243
2244 void execute(VPTransformState &State) override;
2245
2246 Type *getSourceElementType() const { return SourceElementTy; }
2247
2248 bool usesFirstLaneOnly(const VPValue *Op) const override {
2250 "Op must be an operand of the recipe");
2251 return true;
2252 }
2253
2254 /// Returns true if the recipe only uses the first part of operand \p Op.
2255 bool usesFirstPartOnly(const VPValue *Op) const override {
2257 "Op must be an operand of the recipe");
2258 assert(getNumOperands() <= 2 && "must have at most two operands");
2259 return true;
2260 }
2261
2263 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2265 if (auto *Off = getOffset())
2266 Clone->addOperand(Off);
2267 return Clone;
2268 }
2269
2270 /// Return the cost of this VPHeaderPHIRecipe.
2272 VPCostContext &Ctx) const override {
2273 // TODO: Compute accurate cost after retiring the legacy cost model.
2274 return 0;
2275 }
2276
2277protected:
2278#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2279 /// Print the recipe.
2280 void printRecipe(raw_ostream &O, const Twine &Indent,
2281 VPSlotTracker &SlotTracker) const override;
2282#endif
2283};
2284
2285/// A pure virtual base class for all recipes modeling header phis, including
2286/// phis for first order recurrences, pointer inductions and reductions. The
2287/// start value is the first operand of the recipe and the incoming value from
2288/// the backedge is the second operand.
2289///
2290/// Inductions are modeled using the following sub-classes:
2291/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2292/// floating point inductions with arbitrary start and step values. Produces
2293/// a vector PHI per-part.
2294/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2295/// pointer induction. Produces either a vector PHI per-part or scalar values
2296/// per-lane based on the canonical induction.
2297/// * VPFirstOrderRecurrencePHIRecipe
2298/// * VPReductionPHIRecipe
2299/// * VPActiveLaneMaskPHIRecipe
2300/// * VPEVLBasedIVPHIRecipe
2301///
2302/// Note that the canonical IV is modeled as a VPRegionValue associated with
2303/// its loop region.
2305 public VPPhiAccessors {
2306protected:
2307 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2308 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2309 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2310
2311 const VPRecipeBase *getAsRecipe() const override { return this; }
2312
2313public:
2314 ~VPHeaderPHIRecipe() override = default;
2315
2316 /// Method to support type inquiry through isa, cast, and dyn_cast.
2317 static inline bool classof(const VPRecipeBase *R) {
2318 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2319 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2320 }
2321 static inline bool classof(const VPValue *V) {
2322 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2323 }
2324 static inline bool classof(const VPSingleDefRecipe *R) {
2325 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2326 }
2327
2328 /// Generate the phi nodes.
2329 void execute(VPTransformState &State) override = 0;
2330
2331 /// Return the cost of this header phi recipe.
2333 VPCostContext &Ctx) const override;
2334
2335 /// Returns the start value of the phi, if one is set.
2337 return getNumOperands() == 0 ? nullptr : getOperand(0);
2338 }
2340 return getNumOperands() == 0 ? nullptr : getOperand(0);
2341 }
2342
2343 /// Update the start value of the recipe.
2345
2346 /// Returns the incoming value from the loop backedge.
2348 return getOperand(1);
2349 }
2350
2351 /// Update the incoming value from the loop backedge.
2353
2354 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2355 /// to be a recipe.
2357 return *getBackedgeValue()->getDefiningRecipe();
2358 }
2359
2360protected:
2361#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2362 /// Print the recipe.
2363 void printRecipe(raw_ostream &O, const Twine &Indent,
2364 VPSlotTracker &SlotTracker) const override = 0;
2365#endif
2366};
2367
2368/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2369/// VPWidenPointerInductionRecipe), providing shared functionality, including
2370/// retrieving the step value, induction descriptor and original phi node.
2372 InductionDescriptor IndDesc;
2373
2374public:
2375 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2376 VPValue *Step, const InductionDescriptor &IndDesc,
2377 DebugLoc DL)
2378 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2379 addOperand(Step);
2380 }
2381
2382 static inline bool classof(const VPRecipeBase *R) {
2383 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2384 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2385 }
2386
2387 static inline bool classof(const VPValue *V) {
2388 auto *R = V->getDefiningRecipe();
2389 return R && classof(R);
2390 }
2391
2392 static inline bool classof(const VPSingleDefRecipe *R) {
2393 return classof(static_cast<const VPRecipeBase *>(R));
2394 }
2395
2396 void execute(VPTransformState &State) override = 0;
2397
2398 /// Returns the start value of the induction.
2400
2401 /// Returns the step value of the induction.
2403 const VPValue *getStepValue() const { return getOperand(1); }
2404
2405 /// Update the step value of the recipe.
2406 void setStepValue(VPValue *V) { setOperand(1, V); }
2407
2409 const VPValue *getVFValue() const { return getOperand(2); }
2410
2411 /// Returns the number of incoming values, also number of incoming blocks.
2412 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2413 /// incoming value, its start value.
2414 unsigned getNumIncoming() const override { return 1; }
2415
2416 /// Returns the underlying PHINode if one exists, or null otherwise.
2420
2421 /// Returns the induction descriptor for the recipe.
2422 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2423
2425 // TODO: All operands of base recipe must exist and be at same index in
2426 // derived recipe.
2428 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2429 }
2430
2432 // TODO: All operands of base recipe must exist and be at same index in
2433 // derived recipe.
2435 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2436 }
2437
2438 /// Returns true if the recipe only uses the first lane of operand \p Op.
2439 bool usesFirstLaneOnly(const VPValue *Op) const override {
2441 "Op must be an operand of the recipe");
2442 // The recipe creates its own wide start value, so it only requests the
2443 // first lane of the operand.
2444 // TODO: Remove once creating the start value is modeled separately.
2445 return Op == getStartValue() || Op == getStepValue();
2446 }
2447};
2448
2449/// A recipe for handling phi nodes of integer and floating-point inductions,
2450/// producing their vector values. This is an abstract recipe and must be
2451/// converted to concrete recipes before executing.
2453 public VPIRFlags {
2454 TruncInst *Trunc;
2455
2456 // If this recipe is unrolled it will have 2 additional operands.
2457 bool isUnrolled() const { return getNumOperands() == 5; }
2458
2459public:
2461 VPValue *VF, const InductionDescriptor &IndDesc,
2462 const VPIRFlags &Flags, DebugLoc DL)
2463 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2464 Start, Step, IndDesc, DL),
2465 VPIRFlags(Flags), Trunc(nullptr) {
2466 addOperand(VF);
2467 }
2468
2470 VPValue *VF, const InductionDescriptor &IndDesc,
2471 TruncInst *Trunc, const VPIRFlags &Flags,
2472 DebugLoc DL)
2473 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2474 Start, Step, IndDesc, DL),
2475 VPIRFlags(Flags), Trunc(Trunc) {
2476 addOperand(VF);
2478 (void)Metadata;
2479 if (Trunc)
2481 assert(Metadata.empty() && "unexpected metadata on Trunc");
2482 }
2483
2485
2491
2492 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2493
2494 void execute(VPTransformState &State) override {
2495 llvm_unreachable("cannot execute this recipe, should be expanded via "
2496 "expandVPWidenIntOrFpInductionRecipe");
2497 }
2498
2499 /// Returns the start value of the induction.
2501
2502 /// If the recipe has been unrolled, return the VPValue for the induction
2503 /// increment, otherwise return null.
2505 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2506 }
2507
2508 /// Returns the number of incoming values, also number of incoming blocks.
2509 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2510 /// incoming value, its start value.
2511 unsigned getNumIncoming() const override { return 1; }
2512
2513 /// Returns the first defined value as TruncInst, if it is one or nullptr
2514 /// otherwise.
2515 TruncInst *getTruncInst() { return Trunc; }
2516 const TruncInst *getTruncInst() const { return Trunc; }
2517
2518 /// Returns true if the induction is canonical, i.e. starting at 0 and
2519 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2520 /// same type as the canonical induction.
2521 bool isCanonical() const;
2522
2523 /// Returns the scalar type of the induction.
2525 return Trunc ? Trunc->getType() : getStartValue()->getType();
2526 }
2527
2528 /// Returns the VPValue representing the value of this induction at
2529 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2530 /// take place.
2532 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2533 }
2534
2535protected:
2536#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2537 /// Print the recipe.
2538 void printRecipe(raw_ostream &O, const Twine &Indent,
2539 VPSlotTracker &SlotTracker) const override;
2540#endif
2541};
2542
2544public:
2545 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2546 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2547 /// VF*UF.
2549 VPValue *NumUnrolledElems,
2550 const InductionDescriptor &IndDesc, DebugLoc DL)
2551 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2552 Start, Step, IndDesc, DL) {
2553 addOperand(NumUnrolledElems);
2554 }
2555
2557
2563
2564 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2565
2566 /// Generate vector values for the pointer induction.
2567 void execute(VPTransformState &State) override {
2568 llvm_unreachable("cannot execute this recipe, should be expanded via "
2569 "expandVPWidenPointerInduction");
2570 };
2571
2572 /// Returns true if only scalar values will be generated.
2573 bool onlyScalarsGenerated(bool IsScalable);
2574
2575protected:
2576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2577 /// Print the recipe.
2578 void printRecipe(raw_ostream &O, const Twine &Indent,
2579 VPSlotTracker &SlotTracker) const override;
2580#endif
2581};
2582
2583/// A recipe for widened phis. Incoming values are operands of the recipe and
2584/// their operand index corresponds to the incoming predecessor block. If the
2585/// recipe is placed in an entry block to a (non-replicate) region, it must have
2586/// exactly 2 incoming values, the first from the predecessor of the region and
2587/// the second from the exiting block of the region.
2589 public VPPhiAccessors {
2590 /// Name to use for the generated IR instruction for the widened phi.
2591 std::string Name;
2592
2593public:
2594 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingvValues,
2595 /// debug location \p DL and \p Name.
2597 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2598 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues, DL),
2599 Name(Name.str()) {}
2600
2602 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2603 }
2604
2605 ~VPWidenPHIRecipe() override = default;
2606
2607 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2608
2609 /// Generate the phi/select nodes.
2610 void execute(VPTransformState &State) override;
2611
2612 /// Return the cost of this VPWidenPHIRecipe.
2614 VPCostContext &Ctx) const override;
2615
2616protected:
2617#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2618 /// Print the recipe.
2619 void printRecipe(raw_ostream &O, const Twine &Indent,
2620 VPSlotTracker &SlotTracker) const override;
2621#endif
2622
2623 const VPRecipeBase *getAsRecipe() const override { return this; }
2624};
2625
2626/// A recipe for handling first-order recurrence phis. The start value is the
2627/// first operand of the recipe and the incoming value from the backedge is the
2628/// second operand.
2631 VPValue &BackedgeValue)
2632 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2633 &Start) {
2634 addOperand(&BackedgeValue);
2635 }
2636
2637 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2638
2643
2644 void execute(VPTransformState &State) override;
2645
2646 /// Return the cost of this first-order recurrence phi recipe.
2648 VPCostContext &Ctx) const override;
2649
2650 /// Returns true if the recipe only uses the first lane of operand \p Op.
2651 bool usesFirstLaneOnly(const VPValue *Op) const override {
2653 "Op must be an operand of the recipe");
2654 return Op == getStartValue();
2655 }
2656
2657protected:
2658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2659 /// Print the recipe.
2660 void printRecipe(raw_ostream &O, const Twine &Indent,
2661 VPSlotTracker &SlotTracker) const override;
2662#endif
2663};
2664
2665/// Possible variants of a reduction.
2666
2667/// This reduction is ordered and in-loop.
2668struct RdxOrdered {};
2669/// This reduction is in-loop.
2670struct RdxInLoop {};
2671/// This reduction is unordered with the partial result scaled down by some
2672/// factor.
2675};
2676using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2677
2678inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2679 unsigned ScaleFactor) {
2680 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2681 if (Ordered)
2682 return RdxOrdered{};
2683 if (InLoop)
2684 return RdxInLoop{};
2685 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2686}
2687
2688/// A recipe for handling reduction phis. The start value is the first operand
2689/// of the recipe and the incoming value from the backedge is the second
2690/// operand.
2692 /// The recurrence kind of the reduction.
2693 const RecurKind Kind;
2694
2695 ReductionStyle Style;
2696
2697 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2698 /// patterns for argmin/argmax).
2699 /// TODO: Also support cases where the phi itself has a single use, but its
2700 /// compare has multiple uses.
2701 bool HasUsesOutsideReductionChain;
2702
2703public:
2704 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2706 VPValue &BackedgeValue, ReductionStyle Style,
2707 const VPIRFlags &Flags,
2708 bool HasUsesOutsideReductionChain = false)
2709 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2710 VPIRFlags(Flags), Kind(Kind), Style(Style),
2711 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2712 addOperand(&BackedgeValue);
2713 }
2714
2715 ~VPReductionPHIRecipe() override = default;
2716
2718 return new VPReductionPHIRecipe(
2720 *getOperand(0), *getBackedgeValue(), Style, *this,
2721 HasUsesOutsideReductionChain);
2722 }
2723
2724 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2725
2726 /// Generate the phi/select nodes.
2727 void execute(VPTransformState &State) override;
2728
2729 /// Get the factor that the VF of this recipe's output should be scaled by, or
2730 /// 1 if it isn't scaled.
2731 unsigned getVFScaleFactor() const {
2732 auto *Partial = std::get_if<RdxUnordered>(&Style);
2733 return Partial ? Partial->VFScaleFactor : 1;
2734 }
2735
2736 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2737 /// > 1.
2738 void setVFScaleFactor(unsigned ScaleFactor) {
2739 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2740 Style = RdxUnordered{ScaleFactor};
2741 }
2742
2743 /// Returns the number of incoming values, also number of incoming blocks.
2744 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2745 /// incoming value, its start value.
2746 unsigned getNumIncoming() const override { return 2; }
2747
2748 /// Returns the recurrence kind of the reduction.
2749 RecurKind getRecurrenceKind() const { return Kind; }
2750
2751 /// Returns true, if the phi is part of an ordered reduction.
2752 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2753
2754 /// Returns true if the phi is part of an in-loop reduction.
2755 bool isInLoop() const {
2756 return std::holds_alternative<RdxInLoop>(Style) ||
2757 std::holds_alternative<RdxOrdered>(Style);
2758 }
2759
2760 /// Returns true if the reduction outputs a vector with a scaled down VF.
2761 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2762
2763 /// Returns true, if the phi is part of a multi-use reduction.
2765 return HasUsesOutsideReductionChain;
2766 }
2767
2768 /// Returns true if the recipe only uses the first lane of operand \p Op.
2769 bool usesFirstLaneOnly(const VPValue *Op) const override {
2771 "Op must be an operand of the recipe");
2772 return isOrdered() || isInLoop();
2773 }
2774
2775protected:
2776#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2777 /// Print the recipe.
2778 void printRecipe(raw_ostream &O, const Twine &Indent,
2779 VPSlotTracker &SlotTracker) const override;
2780#endif
2781};
2782
2783/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2784/// instructions.
2786public:
2787 /// The blend operation is a User of the incoming values and of their
2788 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2789 /// be omitted (implied by passing an odd number of operands) in which case
2790 /// all other incoming values are merged into it.
2792 const VPIRFlags &Flags, DebugLoc DL)
2793 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2794 assert(Operands.size() >= 2 && "Expected at least two operands!");
2795 setUnderlyingValue(Phi);
2796 }
2797
2798 VPBlendRecipe *clone() override {
2800 operands(), *this, getDebugLoc());
2801 }
2802
2803 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2804
2805 /// A normalized blend is one that has an odd number of operands, whereby the
2806 /// first operand does not have an associated mask.
2807 bool isNormalized() const { return getNumOperands() % 2; }
2808
2809 /// Return the number of incoming values, taking into account when normalized
2810 /// the first incoming value will have no mask.
2811 unsigned getNumIncomingValues() const {
2812 return (getNumOperands() + isNormalized()) / 2;
2813 }
2814
2815 /// Return incoming value number \p Idx.
2816 VPValue *getIncomingValue(unsigned Idx) const {
2817 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2818 }
2819
2820 /// Return mask number \p Idx.
2821 VPValue *getMask(unsigned Idx) const {
2822 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2823 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2824 }
2825
2826 /// Set mask number \p Idx to \p V.
2827 void setMask(unsigned Idx, VPValue *V) {
2828 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2829 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2830 }
2831
2832 void execute(VPTransformState &State) override {
2833 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2834 }
2835
2836 /// Return the cost of this VPWidenMemoryRecipe.
2837 InstructionCost computeCost(ElementCount VF,
2838 VPCostContext &Ctx) const override;
2839
2840 /// Returns true if the recipe only uses the first lane of operand \p Op.
2841 bool usesFirstLaneOnly(const VPValue *Op) const override;
2842
2843protected:
2844#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2845 /// Print the recipe.
2846 void printRecipe(raw_ostream &O, const Twine &Indent,
2847 VPSlotTracker &SlotTracker) const override;
2848#endif
2849};
2850
2851/// A common base class for interleaved memory operations.
2852/// An Interleaved memory operation is a memory access method that combines
2853/// multiple strided loads/stores into a single wide load/store with shuffles.
2854/// The first operand is the start address. The optional operands are, in order,
2855/// the stored values and the mask.
2857 public VPIRMetadata {
2859
2860 /// Indicates if the interleave group is in a conditional block and requires a
2861 /// mask.
2862 bool HasMask = false;
2863
2864 /// Indicates if gaps between members of the group need to be masked out or if
2865 /// unusued gaps can be loaded speculatively.
2866 bool NeedsMaskForGaps = false;
2867
2868protected:
2869 VPInterleaveBase(const unsigned char SC,
2871 ArrayRef<VPValue *> Operands,
2872 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2873 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2874 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2875 NeedsMaskForGaps(NeedsMaskForGaps) {
2876 // TODO: extend the masked interleaved-group support to reversed access.
2877 assert((!Mask || !IG->isReverse()) &&
2878 "Reversed masked interleave-group not supported.");
2879 if (StoredValues.empty()) {
2880 for (Instruction *Inst : IG->members()) {
2881 assert(!Inst->getType()->isVoidTy() && "must have result");
2882 new VPRecipeValue(this, Inst);
2883 }
2884 } else {
2885 for (auto *SV : StoredValues)
2886 addOperand(SV);
2887 }
2888 if (Mask) {
2889 HasMask = true;
2890 addOperand(Mask);
2891 }
2892 }
2893
2894public:
2895 VPInterleaveBase *clone() override = 0;
2896
2897 static inline bool classof(const VPRecipeBase *R) {
2898 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2899 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2900 }
2901
2902 static inline bool classof(const VPUser *U) {
2903 auto *R = dyn_cast<VPRecipeBase>(U);
2904 return R && classof(R);
2905 }
2906
2907 /// Return the address accessed by this recipe.
2908 VPValue *getAddr() const {
2909 return getOperand(0); // Address is the 1st, mandatory operand.
2910 }
2911
2912 /// Return the mask used by this recipe. Note that a full mask is represented
2913 /// by a nullptr.
2914 VPValue *getMask() const {
2915 // Mask is optional and the last operand.
2916 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2917 }
2918
2919 /// Return true if the access needs a mask because of the gaps.
2920 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2921
2923
2924 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2925
2926 void execute(VPTransformState &State) override {
2927 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2928 }
2929
2930 /// Return the cost of this recipe.
2931 InstructionCost computeCost(ElementCount VF,
2932 VPCostContext &Ctx) const override;
2933
2934 /// Returns true if the recipe only uses the first lane of operand \p Op.
2935 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2936
2937 /// Returns the number of stored operands of this interleave group. Returns 0
2938 /// for load interleave groups.
2939 virtual unsigned getNumStoreOperands() const = 0;
2940
2941 /// Return the VPValues stored by this interleave group. If it is a load
2942 /// interleave group, return an empty ArrayRef.
2944 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2946 }
2947};
2948
2949/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2950/// or stores into one wide load/store and shuffles. The first operand of a
2951/// VPInterleave recipe is the address, followed by the stored values, followed
2952/// by an optional mask.
2954public:
2956 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2957 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2958 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2959 Mask, NeedsMaskForGaps, MD, DL) {}
2960
2961 ~VPInterleaveRecipe() override = default;
2962
2966 needsMaskForGaps(), *this, getDebugLoc());
2967 }
2968
2969 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2970
2971 /// Generate the wide load or store, and shuffles.
2972 void execute(VPTransformState &State) override;
2973
2974 bool usesFirstLaneOnly(const VPValue *Op) const override {
2976 "Op must be an operand of the recipe");
2977 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2978 }
2979
2980 unsigned getNumStoreOperands() const override {
2981 return getNumOperands() - (getMask() ? 2 : 1);
2982 }
2983
2984protected:
2985#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2986 /// Print the recipe.
2987 void printRecipe(raw_ostream &O, const Twine &Indent,
2988 VPSlotTracker &SlotTracker) const override;
2989#endif
2990};
2991
2992/// A recipe for interleaved memory operations with vector-predication
2993/// intrinsics. The first operand is the address, the second operand is the
2994/// explicit vector length. Stored values and mask are optional operands.
2996public:
2998 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
2999 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3000 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3001 R.getDebugLoc()) {
3002 assert(!getInterleaveGroup()->isReverse() &&
3003 "Reversed interleave-group with tail folding is not supported.");
3004 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3005 "supported for scalable vector.");
3006 }
3007
3008 ~VPInterleaveEVLRecipe() override = default;
3009
3011 llvm_unreachable("cloning not implemented yet");
3012 }
3013
3014 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3015
3016 /// The VPValue of the explicit vector length.
3017 VPValue *getEVL() const { return getOperand(1); }
3018
3019 /// Generate the wide load or store, and shuffles.
3020 void execute(VPTransformState &State) override;
3021
3022 /// The recipe only uses the first lane of the address, and EVL operand.
3023 bool usesFirstLaneOnly(const VPValue *Op) const override {
3025 "Op must be an operand of the recipe");
3026 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3027 Op == getEVL();
3028 }
3029
3030 unsigned getNumStoreOperands() const override {
3031 return getNumOperands() - (getMask() ? 3 : 2);
3032 }
3033
3034protected:
3035#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3036 /// Print the recipe.
3037 void printRecipe(raw_ostream &O, const Twine &Indent,
3038 VPSlotTracker &SlotTracker) const override;
3039#endif
3040};
3041
3042/// A recipe to represent inloop, ordered or partial reduction operations. It
3043/// performs a reduction on a vector operand into a scalar (vector in the case
3044/// of a partial reduction) value, and adds the result to a chain. The Operands
3045/// are {ChainOp, VecOp, [Condition]}.
3047
3048 /// The recurrence kind for the reduction in question.
3049 RecurKind RdxKind;
3050 /// Whether the reduction is conditional.
3051 bool IsConditional = false;
3052 ReductionStyle Style;
3053
3054protected:
3055 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3057 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3058 ReductionStyle Style, DebugLoc DL)
3059 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
3060 Style(Style) {
3061 if (CondOp) {
3062 IsConditional = true;
3063 addOperand(CondOp);
3064 }
3066 }
3067
3068public:
3070 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3072 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3073 {ChainOp, VecOp}, CondOp, Style, DL) {}
3074
3076 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3078 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3079 {ChainOp, VecOp}, CondOp, Style, DL) {}
3080
3081 ~VPReductionRecipe() override = default;
3082
3084 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3086 getCondOp(), Style, getDebugLoc());
3087 }
3088
3089 static inline bool classof(const VPRecipeBase *R) {
3090 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3091 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3092 }
3093
3094 static inline bool classof(const VPUser *U) {
3095 auto *R = dyn_cast<VPRecipeBase>(U);
3096 return R && classof(R);
3097 }
3098
3099 static inline bool classof(const VPValue *VPV) {
3100 const VPRecipeBase *R = VPV->getDefiningRecipe();
3101 return R && classof(R);
3102 }
3103
3104 static inline bool classof(const VPSingleDefRecipe *R) {
3105 return classof(static_cast<const VPRecipeBase *>(R));
3106 }
3107
3108 /// Generate the reduction in the loop.
3109 void execute(VPTransformState &State) override;
3110
3111 /// Return the cost of VPReductionRecipe.
3112 InstructionCost computeCost(ElementCount VF,
3113 VPCostContext &Ctx) const override;
3114
3115 /// Return the recurrence kind for the in-loop reduction.
3116 RecurKind getRecurrenceKind() const { return RdxKind; }
3117 /// Return true if the in-loop reduction is ordered.
3118 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3119 /// Return true if the in-loop reduction is conditional.
3120 bool isConditional() const { return IsConditional; };
3121 /// Returns true if the reduction outputs a vector with a scaled down VF.
3122 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3123 /// Returns true if the reduction is in-loop.
3124 bool isInLoop() const {
3125 return std::holds_alternative<RdxInLoop>(Style) ||
3126 std::holds_alternative<RdxOrdered>(Style);
3127 }
3128 /// The VPValue of the scalar Chain being accumulated.
3129 VPValue *getChainOp() const { return getOperand(0); }
3130 /// The VPValue of the vector value to be reduced.
3131 VPValue *getVecOp() const { return getOperand(1); }
3132 /// The VPValue of the condition for the block.
3134 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3135 }
3136 /// Get the factor that the VF of this recipe's output should be scaled by, or
3137 /// 1 if it isn't scaled.
3138 unsigned getVFScaleFactor() const {
3139 auto *Partial = std::get_if<RdxUnordered>(&Style);
3140 return Partial ? Partial->VFScaleFactor : 1;
3141 }
3142
3143protected:
3144#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3145 /// Print the recipe.
3146 void printRecipe(raw_ostream &O, const Twine &Indent,
3147 VPSlotTracker &SlotTracker) const override;
3148#endif
3149};
3150
3151/// A recipe to represent inloop reduction operations with vector-predication
3152/// intrinsics, performing a reduction on a vector operand with the explicit
3153/// vector length (EVL) into a scalar value, and adding the result to a chain.
3154/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3156public:
3159 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3160 R.getFastMathFlags(),
3162 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3163 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3164 DL) {}
3165
3166 ~VPReductionEVLRecipe() override = default;
3167
3169 llvm_unreachable("cloning not implemented yet");
3170 }
3171
3172 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3173
3174 /// Generate the reduction in the loop
3175 void execute(VPTransformState &State) override;
3176
3177 /// The VPValue of the explicit vector length.
3178 VPValue *getEVL() const { return getOperand(2); }
3179
3180 /// Returns true if the recipe only uses the first lane of operand \p Op.
3181 bool usesFirstLaneOnly(const VPValue *Op) const override {
3183 "Op must be an operand of the recipe");
3184 return Op == getEVL();
3185 }
3186
3187protected:
3188#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3189 /// Print the recipe.
3190 void printRecipe(raw_ostream &O, const Twine &Indent,
3191 VPSlotTracker &SlotTracker) const override;
3192#endif
3193};
3194
3195/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3196/// copies of the original scalar type, one per lane, instead of producing a
3197/// single copy of widened type for all lanes. If the instruction is known to be
3198/// a single scalar, only one copy will be generated.
3200 public VPIRMetadata {
3201 /// Indicator if only a single replica per lane is needed.
3202 bool IsSingleScalar;
3203
3204 /// Indicator if the replicas are also predicated.
3205 bool IsPredicated;
3206
3207public:
3209 bool IsSingleScalar, VPValue *Mask = nullptr,
3210 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3211 DebugLoc DL = DebugLoc::getUnknown())
3212 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3213 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3214 IsPredicated(Mask) {
3215 setUnderlyingValue(I);
3216 if (Mask)
3217 addOperand(Mask);
3218 }
3219
3220 ~VPReplicateRecipe() override = default;
3221
3223 auto *Copy = new VPReplicateRecipe(
3224 getUnderlyingInstr(), operands(), IsSingleScalar,
3225 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3226 Copy->transferFlags(*this);
3227 return Copy;
3228 }
3229
3230 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3231
3232 /// Generate replicas of the desired Ingredient. Replicas will be generated
3233 /// for all parts and lanes unless a specific part and lane are specified in
3234 /// the \p State.
3235 void execute(VPTransformState &State) override;
3236
3237 /// Return the cost of this VPReplicateRecipe.
3238 InstructionCost computeCost(ElementCount VF,
3239 VPCostContext &Ctx) const override;
3240
3241 /// Return the cost of scalarizing a call to \p CalledFn with argument
3242 /// operands \p ArgOps for a given \p VF.
3243 static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy,
3245 bool IsSingleScalar, ElementCount VF,
3246 VPCostContext &Ctx);
3247
3248 bool isSingleScalar() const { return IsSingleScalar; }
3249
3250 bool isPredicated() const { return IsPredicated; }
3251
3252 /// Returns true if the recipe only uses the first lane of operand \p Op.
3253 bool usesFirstLaneOnly(const VPValue *Op) const override {
3255 "Op must be an operand of the recipe");
3256 return isSingleScalar();
3257 }
3258
3259 /// Returns true if the recipe uses scalars of operand \p Op.
3260 bool usesScalars(const VPValue *Op) const override {
3262 "Op must be an operand of the recipe");
3263 return true;
3264 }
3265
3266 /// Return the mask of a predicated VPReplicateRecipe.
3268 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3269 return getOperand(getNumOperands() - 1);
3270 }
3271
3272 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3273
3274protected:
3275#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3276 /// Print the recipe.
3277 void printRecipe(raw_ostream &O, const Twine &Indent,
3278 VPSlotTracker &SlotTracker) const override;
3279#endif
3280};
3281
3282/// A recipe for generating conditional branches on the bits of a mask.
3284public:
3286 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3287
3290 }
3291
3292 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3293
3294 /// Generate the extraction of the appropriate bit from the block mask and the
3295 /// conditional branch.
3296 void execute(VPTransformState &State) override;
3297
3298 /// Return the cost of this VPBranchOnMaskRecipe.
3299 InstructionCost computeCost(ElementCount VF,
3300 VPCostContext &Ctx) const override;
3301
3302#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3303 /// Print the recipe.
3304 void printRecipe(raw_ostream &O, const Twine &Indent,
3305 VPSlotTracker &SlotTracker) const override {
3306 O << Indent << "BRANCH-ON-MASK ";
3308 }
3309#endif
3310
3311 /// Returns true if the recipe uses scalars of operand \p Op.
3312 bool usesScalars(const VPValue *Op) const override {
3314 "Op must be an operand of the recipe");
3315 return true;
3316 }
3317};
3318
3319/// A recipe to combine multiple recipes into a single 'expression' recipe,
3320/// which should be considered a single entity for cost-modeling and transforms.
3321/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3322/// expression recipes, before execute. The individual expression recipes are
3323/// completely disconnected from the def-use graph of other recipes not part of
3324/// the expression. Def-use edges between pairs of expression recipes remain
3325/// intact, whereas every edge between an expression recipe and a recipe outside
3326/// the expression is elevated to connect the non-expression recipe with the
3327/// VPExpressionRecipe itself.
3328class VPExpressionRecipe : public VPSingleDefRecipe {
3329 /// Recipes included in this VPExpressionRecipe. This could contain
3330 /// duplicates.
3331 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3332
3333 /// Temporary VPValues used for external operands of the expression, i.e.
3334 /// operands not defined by recipes in the expression.
3335 SmallVector<VPValue *> LiveInPlaceholders;
3336
3337 enum class ExpressionTypes {
3338 /// Represents an inloop extended reduction operation, performing a
3339 /// reduction on an extended vector operand into a scalar value, and adding
3340 /// the result to a chain.
3341 ExtendedReduction,
3342 /// Represent an inloop multiply-accumulate reduction, multiplying the
3343 /// extended vector operands, performing a reduction.add on the result, and
3344 /// adding the scalar result to a chain.
3345 ExtMulAccReduction,
3346 /// Represent an inloop multiply-accumulate reduction, multiplying the
3347 /// vector operands, performing a reduction.add on the result, and adding
3348 /// the scalar result to a chain.
3349 MulAccReduction,
3350 /// Represent an inloop multiply-accumulate reduction, multiplying the
3351 /// extended vector operands, negating the multiplication, performing a
3352 /// reduction.add on the result, and adding the scalar result to a chain.
3353 ExtNegatedMulAccReduction,
3354 };
3355
3356 /// Type of the expression.
3357 ExpressionTypes ExpressionType;
3358
3359 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3360 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3361 /// in the expression) are replaced by temporary VPValues and the original
3362 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3363 /// as needed (excluding last) to ensure they are only used by other recipes
3364 /// in the expression.
3365 VPExpressionRecipe(ExpressionTypes ExpressionType,
3366 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3367
3368public:
3370 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3372 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3375 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3376 {Ext0, Ext1, Mul, Red}) {}
3379 VPReductionRecipe *Red)
3380 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3381 {Ext0, Ext1, Mul, Sub, Red}) {
3382 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3383 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3384 "Expected an add reduction");
3385 assert(getNumOperands() >= 3 && "Expected at least three operands");
3386 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3387 assert(SubConst && SubConst->isZero() &&
3388 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3389 }
3390
3392 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3393 for (auto *R : reverse(ExpressionRecipes)) {
3394 if (ExpressionRecipesSeen.insert(R).second)
3395 delete R;
3396 }
3397 for (VPValue *T : LiveInPlaceholders)
3398 delete T;
3399 }
3400
3401 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3402
3403 VPExpressionRecipe *clone() override {
3404 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3405 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3406 for (auto *R : ExpressionRecipes)
3407 NewExpressiondRecipes.push_back(R->clone());
3408 for (auto *New : NewExpressiondRecipes) {
3409 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3410 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3411 // Update placeholder operands in the cloned recipe to use the external
3412 // operands, to be internalized when the cloned expression is constructed.
3413 for (const auto &[Placeholder, OutsideOp] :
3414 zip(LiveInPlaceholders, operands()))
3415 New->replaceUsesOfWith(Placeholder, OutsideOp);
3416 }
3417 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3418 }
3419
3420 /// Return the VPValue to use to infer the result type of the recipe.
3422 unsigned OpIdx =
3423 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3424 : 1;
3425 return getOperand(getNumOperands() - OpIdx);
3426 }
3427
3428 /// Insert the recipes of the expression back into the VPlan, directly before
3429 /// the current recipe. Leaves the expression recipe empty, which must be
3430 /// removed before codegen.
3431 void decompose();
3432
3433 unsigned getVFScaleFactor() const {
3434 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3435 return PR ? PR->getVFScaleFactor() : 1;
3436 }
3437
3438 /// Method for generating code, must not be called as this recipe is abstract.
3439 void execute(VPTransformState &State) override {
3440 llvm_unreachable("recipe must be removed before execute");
3441 }
3442
3444 VPCostContext &Ctx) const override;
3445
3446 /// Returns true if this expression contains recipes that may read from or
3447 /// write to memory.
3448 bool mayReadOrWriteMemory() const;
3449
3450 /// Returns true if this expression contains recipes that may have side
3451 /// effects.
3452 bool mayHaveSideEffects() const;
3453
3454 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3455 bool isSingleScalar() const;
3456
3457protected:
3458#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3459 /// Print the recipe.
3460 void printRecipe(raw_ostream &O, const Twine &Indent,
3461 VPSlotTracker &SlotTracker) const override;
3462#endif
3463};
3464
3465/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3466/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3467/// order to merge values that are set under such a branch and feed their uses.
3468/// The phi nodes can be scalar or vector depending on the users of the value.
3469/// This recipe works in concert with VPBranchOnMaskRecipe.
3471public:
3472 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3473 /// nodes after merging back from a Branch-on-Mask.
3475 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3476 ~VPPredInstPHIRecipe() override = default;
3477
3479 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3480 }
3481
3482 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3483
3484 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3485 /// retain SSA form.
3486 void execute(VPTransformState &State) override;
3487
3488 /// Return the cost of this VPPredInstPHIRecipe.
3490 VPCostContext &Ctx) const override {
3491 // TODO: Compute accurate cost after retiring the legacy cost model.
3492 return 0;
3493 }
3494
3495protected:
3496#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3497 /// Print the recipe.
3498 void printRecipe(raw_ostream &O, const Twine &Indent,
3499 VPSlotTracker &SlotTracker) const override;
3500#endif
3501};
3502
3503/// A common mixin class for widening memory operations. An optional mask can be
3504/// provided as the last operand.
3506protected:
3508
3509 /// Alignment information for this memory access.
3511
3512 /// Whether the accessed addresses are consecutive.
3514
3515 /// Whether the memory access is masked.
3516 bool IsMasked = false;
3517
3518 void setMask(VPValue *Mask) {
3519 assert(!IsMasked && "cannot re-set mask");
3520 if (!Mask)
3521 return;
3522 getAsRecipe()->addOperand(Mask);
3523 IsMasked = true;
3524 }
3525
3530
3531public:
3532 virtual ~VPWidenMemoryRecipe() = default;
3533
3534 /// Return a VPRecipeBase* to the current object.
3536 virtual const VPRecipeBase *getAsRecipe() const = 0;
3537
3538 /// Return whether the loaded-from / stored-to addresses are consecutive.
3539 bool isConsecutive() const { return Consecutive; }
3540
3541 /// Return the address accessed by this recipe.
3542 VPValue *getAddr() const { return getAsRecipe()->getOperand(0); }
3543
3544 /// Returns true if the recipe is masked.
3545 bool isMasked() const { return IsMasked; }
3546
3547 /// Return the mask used by this recipe. Note that a full mask is represented
3548 /// by a nullptr.
3549 VPValue *getMask() const {
3550 // Mask is optional and therefore the last operand.
3551 const VPRecipeBase *R = getAsRecipe();
3552 return isMasked() ? R->getOperand(R->getNumOperands() - 1) : nullptr;
3553 }
3554
3555 /// Returns the alignment of the memory access.
3556 Align getAlign() const { return Alignment; }
3557
3558 /// Return the cost of this VPWidenMemoryRecipe.
3559 InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
3560
3562};
3563
3564/// A recipe for widening load operations, using the address to load from and an
3565/// optional mask.
3567 public VPWidenMemoryRecipe {
3569 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3570 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadSC, {Addr}, &Load, DL),
3571 VPWidenMemoryRecipe(Load, Consecutive, Metadata) {
3572 setMask(Mask);
3573 }
3574
3577 getMask(), Consecutive, *this, getDebugLoc());
3578 }
3579
3580 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3581
3582 /// Generate a wide load or gather.
3583 void execute(VPTransformState &State) override;
3584
3585 /// Return the cost of this VPWidenLoadRecipe.
3587 VPCostContext &Ctx) const override {
3588 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3589 }
3590
3591 /// Returns true if the recipe only uses the first lane of operand \p Op.
3592 bool usesFirstLaneOnly(const VPValue *Op) const override {
3594 "Op must be an operand of the recipe");
3595 // Widened, consecutive loads operations only demand the first lane of
3596 // their address.
3597 return Op == getAddr() && isConsecutive();
3598 }
3599
3600protected:
3601 VPRecipeBase *getAsRecipe() override { return this; }
3602 const VPRecipeBase *getAsRecipe() const override { return this; }
3603
3604#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3605 /// Print the recipe.
3606 void printRecipe(raw_ostream &O, const Twine &Indent,
3607 VPSlotTracker &SlotTracker) const override;
3608#endif
3609};
3610
3611/// A recipe for widening load operations with vector-predication intrinsics,
3612/// using the address to load from, the explicit vector length and an optional
3613/// mask.
3615 public VPWidenMemoryRecipe {
3617 VPValue *Mask)
3618 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadEVLSC, {Addr, &EVL},
3619 &L.getIngredient(), L.getDebugLoc()),
3620 VPWidenMemoryRecipe(L.getIngredient(), L.isConsecutive(), L) {
3621 setMask(Mask);
3622 }
3623
3625 llvm_unreachable("cloning not supported");
3626 }
3627
3628 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3629
3630 /// Return the EVL operand.
3631 VPValue *getEVL() const { return getOperand(1); }
3632
3633 /// Generate the wide load or gather.
3634 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3635
3636 /// Return the cost of this VPWidenLoadEVLRecipe.
3638 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3639
3640 /// Returns true if the recipe only uses the first lane of operand \p Op.
3641 bool usesFirstLaneOnly(const VPValue *Op) const override {
3643 "Op must be an operand of the recipe");
3644 // Widened loads only demand the first lane of EVL and consecutive loads
3645 // only demand the first lane of their address.
3646 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3647 }
3648
3649protected:
3650 VPRecipeBase *getAsRecipe() override { return this; }
3651 const VPRecipeBase *getAsRecipe() const override { return this; }
3652
3653#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3654 /// Print the recipe.
3655 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3656 VPSlotTracker &SlotTracker) const override;
3657#endif
3658};
3659
3660/// A recipe for widening store operations, using the stored value, the address
3661/// to store to and an optional mask.
3663 public VPWidenMemoryRecipe {
3664 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3665 VPValue *Mask, bool Consecutive,
3666 const VPIRMetadata &Metadata, DebugLoc DL)
3667 : VPRecipeBase(VPRecipeBase::VPWidenStoreSC, {Addr, StoredVal}, DL),
3668 VPWidenMemoryRecipe(Store, Consecutive, Metadata) {
3669 setMask(Mask);
3670 }
3671
3675 *this, getDebugLoc());
3676 }
3677
3678 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3679
3680 /// Return the value stored by this recipe.
3681 VPValue *getStoredValue() const { return getOperand(1); }
3682
3683 /// Generate a wide store or scatter.
3684 void execute(VPTransformState &State) override;
3685
3686 /// Return the cost of this VPWidenStoreRecipe.
3688 VPCostContext &Ctx) const override {
3689 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3690 }
3691
3692 /// Returns true if the recipe only uses the first lane of operand \p Op.
3693 bool usesFirstLaneOnly(const VPValue *Op) const override {
3695 "Op must be an operand of the recipe");
3696 // Widened, consecutive stores only demand the first lane of their address,
3697 // unless the same operand is also stored.
3698 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3699 }
3700
3701protected:
3702 VPRecipeBase *getAsRecipe() override { return this; }
3703 const VPRecipeBase *getAsRecipe() const override { return this; }
3704
3705#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3706 /// Print the recipe.
3707 void printRecipe(raw_ostream &O, const Twine &Indent,
3708 VPSlotTracker &SlotTracker) const override;
3709#endif
3710};
3711
3712/// A recipe for widening store operations with vector-predication intrinsics,
3713/// using the value to store, the address to store to, the explicit vector
3714/// length and an optional mask.
3716 public VPWidenMemoryRecipe {
3718 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3719 : VPRecipeBase(VPRecipeBase::VPWidenStoreEVLSC, {Addr, StoredVal, &EVL},
3720 S.getDebugLoc()),
3721 VPWidenMemoryRecipe(S.getIngredient(), S.isConsecutive(), S) {
3722 setMask(Mask);
3723 }
3724
3726 llvm_unreachable("cloning not supported");
3727 }
3728
3729 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3730
3731 /// Return the address accessed by this recipe.
3732 VPValue *getStoredValue() const { return getOperand(1); }
3733
3734 /// Return the EVL operand.
3735 VPValue *getEVL() const { return getOperand(2); }
3736
3737 /// Generate the wide store or scatter.
3738 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3739
3740 /// Return the cost of this VPWidenStoreEVLRecipe.
3742 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3743
3744 /// Returns true if the recipe only uses the first lane of operand \p Op.
3745 bool usesFirstLaneOnly(const VPValue *Op) const override {
3747 "Op must be an operand of the recipe");
3748 if (Op == getEVL()) {
3749 assert(getStoredValue() != Op && "unexpected store of EVL");
3750 return true;
3751 }
3752 // Widened, consecutive memory operations only demand the first lane of
3753 // their address, unless the same operand is also stored. That latter can
3754 // happen with opaque pointers.
3755 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3756 }
3757
3758protected:
3759 VPRecipeBase *getAsRecipe() override { return this; }
3760 const VPRecipeBase *getAsRecipe() const override { return this; }
3761
3762#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3763 /// Print the recipe.
3764 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3765 VPSlotTracker &SlotTracker) const override;
3766#endif
3767};
3768
3769/// Recipe to expand a SCEV expression.
3771 const SCEV *Expr;
3772
3773public:
3775 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3776
3777 ~VPExpandSCEVRecipe() override = default;
3778
3779 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3780
3781 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3782
3783 void execute(VPTransformState &State) override {
3784 llvm_unreachable("SCEV expressions must be expanded before final execute");
3785 }
3786
3787 /// Return the cost of this VPExpandSCEVRecipe.
3789 VPCostContext &Ctx) const override {
3790 // TODO: Compute accurate cost after retiring the legacy cost model.
3791 return 0;
3792 }
3793
3794 const SCEV *getSCEV() const { return Expr; }
3795
3796protected:
3797#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3798 /// Print the recipe.
3799 void printRecipe(raw_ostream &O, const Twine &Indent,
3800 VPSlotTracker &SlotTracker) const override;
3801#endif
3802};
3803
3804/// A recipe for generating the active lane mask for the vector loop that is
3805/// used to predicate the vector operations.
3807public:
3809 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3810 StartMask, DL) {}
3811
3812 ~VPActiveLaneMaskPHIRecipe() override = default;
3813
3816 if (getNumOperands() == 2)
3817 R->addOperand(getOperand(1));
3818 return R;
3819 }
3820
3821 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3822
3823 /// Generate the active lane mask phi of the vector loop.
3824 void execute(VPTransformState &State) override;
3825
3826protected:
3827#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3828 /// Print the recipe.
3829 void printRecipe(raw_ostream &O, const Twine &Indent,
3830 VPSlotTracker &SlotTracker) const override;
3831#endif
3832};
3833
3834/// A recipe for generating the phi node tracking the current scalar iteration
3835/// index. It starts at the start value of the canonical induction and gets
3836/// incremented by the number of scalar iterations processed by the vector loop
3837/// iteration. The increment does not have to be loop invariant.
3839public:
3841 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3842 StartIV, DL) {}
3843
3844 ~VPCurrentIterationPHIRecipe() override = default;
3845
3847 llvm_unreachable("cloning not implemented yet");
3848 }
3849
3850 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3851
3852 void execute(VPTransformState &State) override {
3853 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3854 "scalar phi recipe");
3855 }
3856
3857 /// Return the cost of this VPCurrentIterationPHIRecipe.
3859 VPCostContext &Ctx) const override {
3860 // For now, match the behavior of the legacy cost model.
3861 return 0;
3862 }
3863
3864 /// Returns true if the recipe only uses the first lane of operand \p Op.
3865 bool usesFirstLaneOnly(const VPValue *Op) const override {
3867 "Op must be an operand of the recipe");
3868 return true;
3869 }
3870
3871protected:
3872#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3873 /// Print the recipe.
3874 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3875 VPSlotTracker &SlotTracker) const override;
3876#endif
3877};
3878
3879/// A Recipe for widening the canonical induction variable of the vector loop.
3881 public VPUnrollPartAccessor<1> {
3882public:
3884 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3885
3886 ~VPWidenCanonicalIVRecipe() override = default;
3887
3891
3892 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3893
3894 /// Generate a canonical vector induction variable of the vector loop, with
3895 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3896 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3897 void execute(VPTransformState &State) override;
3898
3899 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3901 VPCostContext &Ctx) const override {
3902 // TODO: Compute accurate cost after retiring the legacy cost model.
3903 return 0;
3904 }
3905
3906 /// Return the canonical IV being widened.
3910
3911protected:
3912#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3913 /// Print the recipe.
3914 void printRecipe(raw_ostream &O, const Twine &Indent,
3915 VPSlotTracker &SlotTracker) const override;
3916#endif
3917};
3918
3919/// A recipe for converting the input value \p IV value to the corresponding
3920/// value of an IV with different start and step values, using Start + IV *
3921/// Step.
3923 /// Kind of the induction.
3925 /// If not nullptr, the floating point induction binary operator. Must be set
3926 /// for floating point inductions.
3927 const FPMathOperator *FPBinOp;
3928
3929public:
3931 VPValue *CanonicalIV, VPValue *Step)
3933 IndDesc.getKind(),
3934 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3935 Start, CanonicalIV, Step) {}
3936
3938 const FPMathOperator *FPBinOp, VPIRValue *Start,
3939 VPValue *IV, VPValue *Step)
3940 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
3941 Kind(Kind), FPBinOp(FPBinOp) {}
3942
3943 ~VPDerivedIVRecipe() override = default;
3944
3946 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3947 getStepValue());
3948 }
3949
3950 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
3951
3952 void execute(VPTransformState &State) override {
3953 llvm_unreachable("Expected prior expansion of this recipe");
3954 }
3955
3956 /// Return the cost of this VPDerivedIVRecipe.
3958 VPCostContext &Ctx) const override {
3959 // TODO: Compute accurate cost after retiring the legacy cost model.
3960 return 0;
3961 }
3962
3963 Type *getScalarType() const { return getStartValue()->getType(); }
3964
3966 VPValue *getIndex() const { return getOperand(1); }
3967 VPValue *getStepValue() const { return getOperand(2); }
3968 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
3970
3971 /// Returns true if the recipe only uses the first lane of operand \p Op.
3972 bool usesFirstLaneOnly(const VPValue *Op) const override {
3974 "Op must be an operand of the recipe");
3975 return true;
3976 }
3977
3978protected:
3979#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3980 /// Print the recipe.
3981 void printRecipe(raw_ostream &O, const Twine &Indent,
3982 VPSlotTracker &SlotTracker) const override;
3983#endif
3984};
3985
3986/// A recipe for handling phi nodes of integer and floating-point inductions,
3987/// producing their scalar values. Before unrolling by UF the recipe represents
3988/// the VF*UF scalar values to be produced, or UF scalar values if only first
3989/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
3990/// operand StartIndex to all unroll parts except part 0, as the recipe
3991/// represents the VF scalar values (this number of values is taken from
3992/// State.VF rather than from the VF operand) starting at IV + StartIndex.
3994 Instruction::BinaryOps InductionOpcode;
3995
3996public:
3999 DebugLoc DL)
4000 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4001 FMFs, DL),
4002 InductionOpcode(Opcode) {}
4003
4005 VPValue *Step, VPValue *VF,
4008 IV, Step, VF, IndDesc.getInductionOpcode(),
4009 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4010 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4011 : FastMathFlags(),
4012 DL) {}
4013
4014 ~VPScalarIVStepsRecipe() override = default;
4015
4017 auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
4018 getOperand(2), InductionOpcode,
4020 if (VPValue *StartIndex = getStartIndex())
4021 NewR->setStartIndex(StartIndex);
4022 return NewR;
4023 }
4024
4025 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4026
4027 /// Generate the scalarized versions of the phi node as needed by their users.
4028 void execute(VPTransformState &State) override;
4029
4030 /// Return the cost of this VPScalarIVStepsRecipe.
4032 VPCostContext &Ctx) const override {
4033 // TODO: Compute accurate cost after retiring the legacy cost model.
4034 return 0;
4035 }
4036
4037 VPValue *getStepValue() const { return getOperand(1); }
4038
4039 /// Return the number of scalars to produce per unroll part, used to compute
4040 /// StartIndex during unrolling.
4041 VPValue *getVFValue() const { return getOperand(2); }
4042
4043 /// Return the StartIndex, or null if known to be zero, valid only after
4044 /// unrolling.
4046 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4047 }
4048
4049 /// Set or add the StartIndex operand.
4050 void setStartIndex(VPValue *StartIndex) {
4051 if (getNumOperands() == 4)
4052 setOperand(3, StartIndex);
4053 else
4054 addOperand(StartIndex);
4055 }
4056
4057 /// Returns true if the recipe only uses the first lane of operand \p Op.
4058 bool usesFirstLaneOnly(const VPValue *Op) const override {
4060 "Op must be an operand of the recipe");
4061 return true;
4062 }
4063
4064 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4065
4066protected:
4067#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4068 /// Print the recipe.
4069 void printRecipe(raw_ostream &O, const Twine &Indent,
4070 VPSlotTracker &SlotTracker) const override;
4071#endif
4072};
4073
4074/// CastInfo helper for casting from VPRecipeBase to a mixin class that is not
4075/// part of the VPRecipeBase class hierarchy (e.g. VPPhiAccessors,
4076/// VPIRMetadata).
4077namespace vpdetail {
4078template <typename VPMixin, typename... RecipeTys>
4080 : public DefaultDoCastIfPossible<VPMixin *, VPRecipeBase *,
4081 CastInfoMixinImpl<VPMixin, RecipeTys...>> {
4082 static_assert((std::is_base_of_v<VPMixin, RecipeTys> && ...),
4083 "Each type in RecipeTys must derive from VPMixin");
4084
4085 /// Used by isa.
4086 static bool isPossible(VPRecipeBase *R) { return isa<RecipeTys...>(R); }
4087
4088 /// Used by cast.
4089 static VPMixin *doCast(VPRecipeBase *R) {
4090 VPMixin *Out = nullptr;
4091 ((Out = dyn_cast<RecipeTys>(R)) || ...);
4092 assert(Out && "Illegal recipe for cast");
4093 return Out;
4094 }
4095 static VPMixin *castFailed() { return nullptr; }
4096};
4097} // namespace vpdetail
4098
4099/// Support casting from VPRecipeBase -> VPPhiAccessors.
4100template <>
4104
4105template <>
4110template <>
4112 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4113 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4114
4115/// Support casting from VPRecipeBase / VPUser -> VPWidenMemoryRecipe.
4116template <>
4121template <>
4126
4127/// Support casting from VPRecipeBase -> VPIRMetadata.
4128template <>
4134
4135template <>
4140template <>
4142 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4143 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4144
4145/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4146/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4147/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4148class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4149 friend class VPlan;
4150
4151 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4152 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4153 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4154 if (Recipe)
4155 appendRecipe(Recipe);
4156 }
4157
4158public:
4160
4161protected:
4162 /// The VPRecipes held in the order of output instructions to generate.
4164
4165 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4166 : VPBlockBase(BlockSC, Name.str()) {}
4167
4168public:
4169 ~VPBasicBlock() override {
4170 while (!Recipes.empty())
4171 Recipes.pop_back();
4172 }
4173
4174 /// Instruction iterators...
4179
4180 //===--------------------------------------------------------------------===//
4181 /// Recipe iterator methods
4182 ///
4183 inline iterator begin() { return Recipes.begin(); }
4184 inline const_iterator begin() const { return Recipes.begin(); }
4185 inline iterator end() { return Recipes.end(); }
4186 inline const_iterator end() const { return Recipes.end(); }
4187
4188 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4189 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4190 inline reverse_iterator rend() { return Recipes.rend(); }
4191 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4192
4193 inline size_t size() const { return Recipes.size(); }
4194 inline bool empty() const { return Recipes.empty(); }
4195 inline const VPRecipeBase &front() const { return Recipes.front(); }
4196 inline VPRecipeBase &front() { return Recipes.front(); }
4197 inline const VPRecipeBase &back() const { return Recipes.back(); }
4198 inline VPRecipeBase &back() { return Recipes.back(); }
4199
4200 /// Returns a reference to the list of recipes.
4202
4203 /// Returns a pointer to a member of the recipe list.
4204 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4205 return &VPBasicBlock::Recipes;
4206 }
4207
4208 /// Method to support type inquiry through isa, cast, and dyn_cast.
4209 static inline bool classof(const VPBlockBase *V) {
4210 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4211 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4212 }
4213
4214 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4215 assert(Recipe && "No recipe to append.");
4216 assert(!Recipe->Parent && "Recipe already in VPlan");
4217 Recipe->Parent = this;
4218 Recipes.insert(InsertPt, Recipe);
4219 }
4220
4221 /// Augment the existing recipes of a VPBasicBlock with an additional
4222 /// \p Recipe as the last recipe.
4223 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4224
4225 /// The method which generates the output IR instructions that correspond to
4226 /// this VPBasicBlock, thereby "executing" the VPlan.
4227 void execute(VPTransformState *State) override;
4228
4229 /// Return the cost of this VPBasicBlock.
4230 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4231
4232 /// Return the position of the first non-phi node recipe in the block.
4233 iterator getFirstNonPhi();
4234
4235 /// Returns an iterator range over the PHI-like recipes in the block.
4239
4240 /// Split current block at \p SplitAt by inserting a new block between the
4241 /// current block and its successors and moving all recipes starting at
4242 /// SplitAt to the new block. Returns the new block.
4243 VPBasicBlock *splitAt(iterator SplitAt);
4244
4245 VPRegionBlock *getEnclosingLoopRegion();
4246 const VPRegionBlock *getEnclosingLoopRegion() const;
4247
4248#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4249 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4250 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4251 ///
4252 /// Note that the numbering is applied to the whole VPlan, so printing
4253 /// individual blocks is consistent with the whole VPlan printing.
4254 void print(raw_ostream &O, const Twine &Indent,
4255 VPSlotTracker &SlotTracker) const override;
4256 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4257#endif
4258
4259 /// If the block has multiple successors, return the branch recipe terminating
4260 /// the block. If there are no or only a single successor, return nullptr;
4261 VPRecipeBase *getTerminator();
4262 const VPRecipeBase *getTerminator() const;
4263
4264 /// Returns true if the block is exiting it's parent region.
4265 bool isExiting() const;
4266
4267 /// Clone the current block and it's recipes, without updating the operands of
4268 /// the cloned recipes.
4269 VPBasicBlock *clone() override;
4270
4271 /// Returns the predecessor block at index \p Idx with the predecessors as per
4272 /// the corresponding plain CFG. If the block is an entry block to a region,
4273 /// the first predecessor is the single predecessor of a region, and the
4274 /// second predecessor is the exiting block of the region.
4275 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4276
4277protected:
4278 /// Execute the recipes in the IR basic block \p BB.
4279 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4280
4281 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4282 /// generated for this VPBB.
4283 void connectToPredecessors(VPTransformState &State);
4284
4285private:
4286 /// Create an IR BasicBlock to hold the output instructions generated by this
4287 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4288 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4289};
4290
4291inline const VPBasicBlock *
4293 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4294}
4295
4296/// A special type of VPBasicBlock that wraps an existing IR basic block.
4297/// Recipes of the block get added before the first non-phi instruction in the
4298/// wrapped block.
4299/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4300/// preheader block.
4301class VPIRBasicBlock : public VPBasicBlock {
4302 friend class VPlan;
4303
4304 BasicBlock *IRBB;
4305
4306 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4307 VPIRBasicBlock(BasicBlock *IRBB)
4308 : VPBasicBlock(VPIRBasicBlockSC,
4309 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4310 IRBB(IRBB) {}
4311
4312public:
4313 ~VPIRBasicBlock() override = default;
4314
4315 static inline bool classof(const VPBlockBase *V) {
4316 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4317 }
4318
4319 /// The method which generates the output IR instructions that correspond to
4320 /// this VPBasicBlock, thereby "executing" the VPlan.
4321 void execute(VPTransformState *State) override;
4322
4323 VPIRBasicBlock *clone() override;
4324
4325 BasicBlock *getIRBasicBlock() const { return IRBB; }
4326};
4327
4328/// Track information about the canonical IV value of a region.
4329/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4331 /// VPRegionValue for the canonical IV, whose allocation is managed by
4332 /// VPCanonicalIVInfo.
4333 std::unique_ptr<VPRegionValue> CanIV;
4334
4335 /// Whether the increment of the canonical IV may unsigned wrap or not.
4336 bool HasNUW = true;
4337
4338public:
4340 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4341
4342 VPRegionValue *getRegionValue() { return CanIV.get(); }
4343 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4344
4345 bool hasNUW() const { return HasNUW; }
4346
4347 void clearNUW() { HasNUW = false; }
4348};
4349
4350/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4351/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4352/// A VPRegionBlock may indicate that its contents are to be replicated several
4353/// times. This is designed to support predicated scalarization, in which a
4354/// scalar if-then code structure needs to be generated VF * UF times. Having
4355/// this replication indicator helps to keep a single model for multiple
4356/// candidate VF's. The actual replication takes place only once the desired VF
4357/// and UF have been determined.
4358class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4359 friend class VPlan;
4360
4361 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4362 VPBlockBase *Entry;
4363
4364 /// Hold the Single Exiting block of the SESE region modelled by the
4365 /// VPRegionBlock.
4366 VPBlockBase *Exiting;
4367
4368 /// Holds the Canonical IV of the loop region along with additional
4369 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4370 /// Loop regions retain their canonical IVs until they are dissolved, even if
4371 /// the canonical IV has no users.
4372 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4373
4374 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4375 /// VPRegionBlocks.
4376 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4377 const std::string &Name = "")
4378 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4379 if (Entry) {
4380 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4381 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4382 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4383 Entry->setParent(this);
4384 Exiting->setParent(this);
4385 }
4386 }
4387
4388 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4389 VPBlockBase *Exiting, const std::string &Name = "")
4390 : VPRegionBlock(Entry, Exiting, Name) {
4391 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4392 }
4393
4394public:
4395 ~VPRegionBlock() override = default;
4396
4397 /// Method to support type inquiry through isa, cast, and dyn_cast.
4398 static inline bool classof(const VPBlockBase *V) {
4399 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4400 }
4401
4402 const VPBlockBase *getEntry() const { return Entry; }
4403 VPBlockBase *getEntry() { return Entry; }
4404
4405 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4406 /// EntryBlock must have no predecessors.
4407 void setEntry(VPBlockBase *EntryBlock) {
4408 assert(!EntryBlock->hasPredecessors() &&
4409 "Entry block cannot have predecessors.");
4410 Entry = EntryBlock;
4411 EntryBlock->setParent(this);
4412 }
4413
4414 const VPBlockBase *getExiting() const { return Exiting; }
4415 VPBlockBase *getExiting() { return Exiting; }
4416
4417 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4418 /// ExitingBlock must have no successors.
4419 void setExiting(VPBlockBase *ExitingBlock) {
4420 assert(!ExitingBlock->hasSuccessors() &&
4421 "Exit block cannot have successors.");
4422 Exiting = ExitingBlock;
4423 ExitingBlock->setParent(this);
4424 }
4425
4426 /// Returns the pre-header VPBasicBlock of the loop region.
4428 assert(!isReplicator() && "should only get pre-header of loop regions");
4429 return getSinglePredecessor()->getExitingBasicBlock();
4430 }
4431
4432 /// An indicator whether this region is to generate multiple replicated
4433 /// instances of output IR corresponding to its VPBlockBases.
4434 bool isReplicator() const { return !CanIVInfo; }
4435
4436 /// The method which generates the output IR instructions that correspond to
4437 /// this VPRegionBlock, thereby "executing" the VPlan.
4438 void execute(VPTransformState *State) override;
4439
4440 // Return the cost of this region.
4441 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4442
4443#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4444 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4445 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4446 /// consequtive numbers.
4447 ///
4448 /// Note that the numbering is applied to the whole VPlan, so printing
4449 /// individual regions is consistent with the whole VPlan printing.
4450 void print(raw_ostream &O, const Twine &Indent,
4451 VPSlotTracker &SlotTracker) const override;
4452 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4453#endif
4454
4455 /// Clone all blocks in the single-entry single-exit region of the block and
4456 /// their recipes without updating the operands of the cloned recipes.
4457 VPRegionBlock *clone() override;
4458
4459 /// Remove the current region from its VPlan, connecting its predecessor to
4460 /// its entry, and its exiting block to its successor.
4461 void dissolveToCFGLoop();
4462
4463 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4464 /// a new increment before the terminator and return it. The canonical IV
4465 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4466 VPInstruction *getOrCreateCanonicalIVIncrement();
4467
4468 /// Return the canonical induction variable of the region, null for
4469 /// replicating regions.
4471 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4472 }
4474 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4475 }
4476
4477 /// Return the type of the canonical IV for loop regions.
4479 return CanIVInfo->getRegionValue()->getType();
4480 }
4481
4482 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4483 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4484
4485 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4487 assert(Increment && "Must provide increment to clear");
4488 Increment->dropPoisonGeneratingFlags();
4489 CanIVInfo->clearNUW();
4490 }
4491};
4492
4494 return getParent()->getParent();
4495}
4496
4498 return getParent()->getParent();
4499}
4500
4501/// VPlan models a candidate for vectorization, encoding various decisions take
4502/// to produce efficient output IR, including which branches, basic-blocks and
4503/// output IR instructions to generate, and their cost. VPlan holds a
4504/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4505/// VPBasicBlock.
4506class VPlan {
4507 friend class VPlanPrinter;
4508 friend class VPSlotTracker;
4509
4510 /// VPBasicBlock corresponding to the original preheader. Used to place
4511 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4512 /// rest of VPlan execution.
4513 /// When this VPlan is used for the epilogue vector loop, the entry will be
4514 /// replaced by a new entry block created during skeleton creation.
4515 VPBasicBlock *Entry;
4516
4517 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4518 VPIRBasicBlock *ScalarHeader;
4519
4520 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4521 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4522 /// e.g. if the scalar epilogue always executes.
4524
4525 /// Holds the VFs applicable to this VPlan.
4527
4528 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4529 /// any UF.
4531
4532 /// Holds the name of the VPlan, for printing.
4533 std::string Name;
4534
4535 /// Represents the trip count of the original loop, for folding
4536 /// the tail.
4537 VPValue *TripCount = nullptr;
4538
4539 /// Represents the backedge taken count of the original loop, for folding
4540 /// the tail. It equals TripCount - 1.
4541 VPSymbolicValue *BackedgeTakenCount = nullptr;
4542
4543 /// Represents the vector trip count.
4544 VPSymbolicValue VectorTripCount;
4545
4546 /// Represents the vectorization factor of the loop.
4547 VPSymbolicValue VF;
4548
4549 /// Represents the unroll factor of the loop.
4550 VPSymbolicValue UF;
4551
4552 /// Represents the loop-invariant VF * UF of the vector loop region.
4553 VPSymbolicValue VFxUF;
4554
4555 /// Contains all the external definitions created for this VPlan, as a mapping
4556 /// from IR Values to VPIRValues.
4558
4559 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4560 /// VPlan is destroyed.
4561 SmallVector<VPBlockBase *> CreatedBlocks;
4562
4563 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4564 /// wrapping the original header of the scalar loop. The vector loop will have
4565 /// index type \p IdxTy.
4566 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader, Type *IdxTy)
4567 : Entry(Entry), ScalarHeader(ScalarHeader), VectorTripCount(IdxTy),
4568 VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4569 Entry->setPlan(this);
4570 assert(ScalarHeader->getNumSuccessors() == 0 &&
4571 "scalar header must be a leaf node");
4572 }
4573
4574public:
4575 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4576 /// original preheader and scalar header of \p L, to be used as entry and
4577 /// scalar header blocks of the new VPlan. The vector loop will have index
4578 /// type \p IdxTy.
4579 VPlan(Loop *L, Type *IdxTy);
4580
4581 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4582 /// wrapping \p ScalarHeaderBB and vector loop index of type \p IdxTy.
4583 VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
4584 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4585 setEntry(createVPBasicBlock("preheader"));
4586 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4587 }
4588
4590
4592 Entry = VPBB;
4593 VPBB->setPlan(this);
4594 }
4595
4596 /// Generate the IR code for this VPlan.
4597 void execute(VPTransformState *State);
4598
4599 /// Return the cost of this plan.
4601
4602 VPBasicBlock *getEntry() { return Entry; }
4603 const VPBasicBlock *getEntry() const { return Entry; }
4604
4605 /// Returns the preheader of the vector loop region, if one exists, or null
4606 /// otherwise.
4608 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4609 return VectorRegion
4610 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4611 : nullptr;
4612 }
4613
4614 /// Returns the VPRegionBlock of the vector loop.
4617
4618 /// Returns true if this VPlan is for an outer loop, i.e., its vector
4619 /// loop region contains a nested loop region.
4620 LLVM_ABI_FOR_TEST bool isOuterLoop() const;
4621
4622 /// Returns the 'middle' block of the plan, that is the block that selects
4623 /// whether to execute the scalar tail loop or the exit block from the loop
4624 /// latch. If there is an early exit from the vector loop, the middle block
4625 /// conceptully has the early exit block as third successor, split accross 2
4626 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4627 /// tail loop or the exit block. If the scalar tail loop or exit block are
4628 /// known to always execute, the middle block may branch directly to that
4629 /// block. This function cannot be called once the vector loop region has been
4630 /// removed.
4632 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4633 assert(
4634 LoopRegion &&
4635 "cannot call the function after vector loop region has been removed");
4636 // The middle block is always the last successor of the region.
4637 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4638 }
4639
4641 return const_cast<VPlan *>(this)->getMiddleBlock();
4642 }
4643
4644 /// Return the VPBasicBlock for the preheader of the scalar loop.
4647 getScalarHeader()->getSinglePredecessor());
4648 }
4649
4650 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4651 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4652
4653 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4654 /// the original scalar loop.
4655 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4656
4657 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4658 /// exit block.
4660
4661 /// Returns true if \p VPBB is an exit block.
4662 bool isExitBlock(VPBlockBase *VPBB);
4663
4664 /// The trip count of the original loop.
4666 assert(TripCount && "trip count needs to be set before accessing it");
4667 return TripCount;
4668 }
4669
4670 /// Set the trip count assuming it is currently null; if it is not - use
4671 /// resetTripCount().
4672 void setTripCount(VPValue *NewTripCount) {
4673 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4674 TripCount = NewTripCount;
4675 }
4676
4677 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4678 /// the original trip count have been replaced.
4679 void resetTripCount(VPValue *NewTripCount) {
4680 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4681 "TripCount must be set when resetting");
4682 TripCount = NewTripCount;
4683 }
4684
4685 /// The backedge taken count of the original loop.
4687 // BTC shares the canonical IV type with VectorTripCount.
4688 if (!BackedgeTakenCount)
4689 BackedgeTakenCount = new VPSymbolicValue(VectorTripCount.getType());
4690 return BackedgeTakenCount;
4691 }
4692 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4693
4694 /// The vector trip count.
4695 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4696
4697 /// Returns the VF of the vector loop region.
4698 VPSymbolicValue &getVF() { return VF; };
4699 const VPSymbolicValue &getVF() const { return VF; };
4700
4701 /// Returns the UF of the vector loop region.
4702 VPSymbolicValue &getUF() { return UF; };
4703
4704 /// Returns VF * UF of the vector loop region.
4705 VPSymbolicValue &getVFxUF() { return VFxUF; }
4706
4709 }
4710
4711 const DataLayout &getDataLayout() const {
4713 }
4714
4715 void addVF(ElementCount VF) { VFs.insert(VF); }
4716
4718 assert(hasVF(VF) && "Cannot set VF not already in plan");
4719 VFs.clear();
4720 VFs.insert(VF);
4721 }
4722
4723 /// Remove \p VF from the plan.
4725 assert(hasVF(VF) && "tried to remove VF not present in plan");
4726 VFs.remove(VF);
4727 }
4728
4729 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4730 bool hasScalableVF() const {
4731 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4732 }
4733
4734 /// Returns an iterator range over all VFs of the plan.
4737 return VFs;
4738 }
4739
4740 /// Returns the single VF of the plan, asserting that the plan has exactly
4741 /// one VF.
4743 assert(VFs.size() == 1 && "expected plan with single VF");
4744 return VFs[0];
4745 }
4746
4747 bool hasScalarVFOnly() const {
4748 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4749 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4750 "Plan with scalar VF should only have a single VF");
4751 return HasScalarVFOnly;
4752 }
4753
4754 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4755
4756 /// Returns the concrete UF of the plan, after unrolling.
4757 unsigned getConcreteUF() const {
4758 assert(UFs.size() == 1 && "Expected a single UF");
4759 return UFs[0];
4760 }
4761
4762 void setUF(unsigned UF) {
4763 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4764 UFs.clear();
4765 UFs.insert(UF);
4766 }
4767
4768 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4769 /// concrete UF.
4770 bool isUnrolled() const { return UFs.size() == 1; }
4771
4772 /// Return a string with the name of the plan and the applicable VFs and UFs.
4773 std::string getName() const;
4774
4775 void setName(const Twine &newName) { Name = newName.str(); }
4776
4777 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4778 /// yet) for \p V.
4780 assert(V && "Trying to get or add the VPIRValue of a null Value");
4781 auto [It, Inserted] = LiveIns.try_emplace(V);
4782 if (Inserted) {
4783 if (auto *CI = dyn_cast<ConstantInt>(V))
4784 It->second = new VPConstantInt(CI);
4785 else
4786 It->second = new VPIRValue(V);
4787 }
4788
4789 assert(isa<VPIRValue>(It->second) &&
4790 "Only VPIRValues should be in mapping");
4791 return It->second;
4792 }
4794 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4795 return getOrAddLiveIn(V->getValue());
4796 }
4797
4798 /// Return a VPIRValue wrapping i1 true.
4799 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4800
4801 /// Return a VPIRValue wrapping i1 false.
4802 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4803
4804 /// Return a VPIRValue wrapping the null value of type \p Ty.
4805 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
4806
4807 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4809 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
4810 }
4811
4812 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4813 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4814 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4815 }
4816
4817 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4818 /// value.
4820 bool IsSigned = false) {
4821 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4822 }
4823
4824 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4826 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4827 }
4828
4829 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4830 /// otherwise.
4831 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4832
4833 /// Return the list of live-in VPValues available in the VPlan.
4834 auto getLiveIns() const { return LiveIns.values(); }
4835
4836#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4837 /// Print the live-ins of this VPlan to \p O.
4838 void printLiveIns(raw_ostream &O) const;
4839
4840 /// Print this VPlan to \p O.
4841 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4842
4843 /// Print this VPlan in DOT format to \p O.
4844 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4845
4846 /// Dump the plan to stderr (for debugging).
4847 LLVM_DUMP_METHOD void dump() const;
4848#endif
4849
4850 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4851 /// recipes to refer to the clones, and return it.
4853
4854 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4855 /// present. The returned block is owned by the VPlan and deleted once the
4856 /// VPlan is destroyed.
4858 VPRecipeBase *Recipe = nullptr) {
4859 auto *VPB = new VPBasicBlock(Name, Recipe);
4860 CreatedBlocks.push_back(VPB);
4861 return VPB;
4862 }
4863
4864 /// Create a new loop region with a canonical IV using \p CanIVTy and
4865 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
4866 /// to \p Entry and \p Exiting respectively, if provided. The returned block
4867 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4869 const std::string &Name = "",
4870 VPBlockBase *Entry = nullptr,
4871 VPBlockBase *Exiting = nullptr) {
4872 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
4873 CreatedBlocks.push_back(VPB);
4874 return VPB;
4875 }
4876
4877 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4878 /// returned block is owned by the VPlan and deleted once the VPlan is
4879 /// destroyed.
4881 const std::string &Name = "") {
4882 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
4883 CreatedBlocks.push_back(VPB);
4884 return VPB;
4885 }
4886
4887 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4888 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4889 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4891
4892 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4893 /// instructions in \p IRBB, except its terminator which is managed by the
4894 /// successors of the block in VPlan. The returned block is owned by the VPlan
4895 /// and deleted once the VPlan is destroyed.
4897
4898 /// Returns true if the VPlan is based on a loop with an early exit. That is
4899 /// the case if the VPlan has either more than one exit block or a single exit
4900 /// block with multiple predecessors (one for the exit via the latch and one
4901 /// via the other early exit).
4902 bool hasEarlyExit() const {
4903 return count_if(ExitBlocks,
4904 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4905 1 ||
4906 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4907 }
4908
4909 /// Returns true if the scalar tail may execute after the vector loop, i.e.
4910 /// if the middle block is a predecessor of the scalar preheader. Note that
4911 /// this relies on unneeded branches to the scalar tail loop being removed.
4912 bool hasScalarTail() const {
4913 auto *ScalarPH = getScalarPreheader();
4914 return ScalarPH &&
4915 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
4916 }
4917
4918 /// The type of the canonical induction variable of the vector loop.
4919 Type *getIndexType() const { return VF.getType(); }
4920};
4921
4922#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4923inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4924 Plan.print(OS);
4925 return OS;
4926}
4927#endif
4928
4929} // end namespace llvm
4930
4931#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:586
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3814
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3808
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4148
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4176
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4223
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4178
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4175
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4201
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4159
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4165
iterator end()
Definition VPlan.h:4185
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4183
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4177
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4236
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:752
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:233
~VPBasicBlock() override
Definition VPlan.h:4169
const_reverse_iterator rbegin() const
Definition VPlan.h:4189
reverse_iterator rend()
Definition VPlan.h:4190
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4163
VPRecipeBase & back()
Definition VPlan.h:4198
const VPRecipeBase & front() const
Definition VPlan.h:4195
const_iterator begin() const
Definition VPlan.h:4184
VPRecipeBase & front()
Definition VPlan.h:4196
const VPRecipeBase & back() const
Definition VPlan.h:4197
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4214
bool empty() const
Definition VPlan.h:4194
const_iterator end() const
Definition VPlan.h:4186
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4209
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4204
reverse_iterator rbegin()
Definition VPlan.h:4188
friend class VPlan
Definition VPlan.h:4149
size_t size() const
Definition VPlan.h:4193
const_reverse_iterator rend() const
Definition VPlan.h:4191
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2816
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2821
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2791
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2811
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2832
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2798
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2827
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2807
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:93
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:314
VPRegionBlock * getParent()
Definition VPlan.h:185
VPBlocksTy & getPredecessors()
Definition VPlan.h:222
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:219
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:384
void setName(const Twine &newName)
Definition VPlan.h:178
size_t getNumSuccessors() const
Definition VPlan.h:236
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:218
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:216
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:336
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:650
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:172
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:272
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:349
size_t getNumPredecessors() const
Definition VPlan.h:237
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:305
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:225
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:342
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:214
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:221
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:170
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:197
const VPRegionBlock * getParent() const
Definition VPlan.h:186
const std::string & getName() const
Definition VPlan.h:176
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:324
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:262
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:296
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:232
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:256
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:321
friend class VPBlockUtils
Definition VPlan.h:94
unsigned getVPBlockID() const
Definition VPlan.h:183
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:363
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:328
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:162
VPBlocksTy & getSuccessors()
Definition VPlan.h:211
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:217
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:285
void setParent(VPRegionBlock *P)
Definition VPlan.h:196
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:278
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:226
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:210
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3304
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3288
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3312
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3285
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4342
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4339
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4343
bool hasNUW() const
Definition VPlan.h:4345
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3846
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3840
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:3858
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3852
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3865
~VPCurrentIterationPHIRecipe() override=default
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:3969
VPValue * getIndex() const
Definition VPlan.h:3966
const FPMathOperator * getFPBinOp() const
Definition VPlan.h:3968
VPIRValue * getStartValue() const
Definition VPlan.h:3965
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3957
VPValue * getStepValue() const
Definition VPlan.h:3967
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3952
Type * getScalarType() const
Definition VPlan.h:3963
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3945
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step)
Definition VPlan.h:3937
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3972
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPValue *CanonicalIV, VPValue *Step)
Definition VPlan.h:3930
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3783
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3788
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3774
const SCEV * getSCEV() const
Definition VPlan.h:3794
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3779
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3439
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3421
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3403
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3391
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3377
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3369
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3373
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3433
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3371
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2305
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2307
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2311
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2324
static bool classof(const VPValue *V)
Definition VPlan.h:2321
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2347
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2352
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2336
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2344
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2317
VPValue * getStartValue() const
Definition VPlan.h:2339
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2356
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2062
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2079
unsigned getOpcode() const
Definition VPlan.h:2075
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2055
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4301
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:462
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4325
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4315
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4302
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:487
Class to record and manage LLVM IR flags.
Definition VPlan.h:685
FastMathFlagsTy FMFs
Definition VPlan.h:773
ReductionFlagsTy ReductionFlags
Definition VPlan.h:775
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1028
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:866
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:846
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:832
WrapFlagsTy WrapFlags
Definition VPlan.h:767
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:825
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:990
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1054
TruncFlagsTy TruncFlags
Definition VPlan.h:768
CmpInst::Predicate getPredicate() const
Definition VPlan.h:962
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1038
uint8_t AllFlags[2]
Definition VPlan.h:776
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:998
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:871
ExactFlagsTy ExactFlags
Definition VPlan.h:770
bool hasNoSignedWrap() const
Definition VPlan.h:1017
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1042
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:837
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:842
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:851
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:820
uint8_t GEPFlagsStorage
Definition VPlan.h:771
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:856
bool isNonNeg() const
Definition VPlan.h:1000
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:980
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:985
DisjointFlagsTy DisjointFlags
Definition VPlan.h:769
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:970
bool hasNoUnsignedWrap() const
Definition VPlan.h:1006
FCmpFlagsTy FCmpFlags
Definition VPlan.h:774
NonNegFlagsTy NonNegFlags
Definition VPlan.h:772
bool isReductionInLoop() const
Definition VPlan.h:1060
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:882
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:919
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:861
uint8_t CmpPredStorage
Definition VPlan.h:766
RecurKind getRecurKind() const
Definition VPlan.h:1048
VPIRFlags(Instruction &I)
Definition VPlan.h:782
Instruction & getInstruction() const
Definition VPlan.h:1709
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1717
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1696
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1723
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1711
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1684
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1165
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1201
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1173
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1185
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1514
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1556
static bool classof(const VPUser *R)
Definition VPlan.h:1541
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1522
Type * getResultType() const
Definition VPlan.h:1562
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1545
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1220
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1446
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1466
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1387
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1326
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1317
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1333
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1307
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1320
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1260
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1311
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1255
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1252
@ VScale
Returns the value for vscale.
Definition VPlan.h:1329
@ CanonicalIVIncrementForPart
Definition VPlan.h:1236
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1263
bool hasResult() const
Definition VPlan.h:1411
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1469
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1451
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1491
unsigned getOpcode() const
Definition VPlan.h:1395
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1494
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1460
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1436
A common base class for interleaved memory operations.
Definition VPlan.h:2857
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2920
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2926
static bool classof(const VPUser *U)
Definition VPlan.h:2902
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2869
Instruction * getInsertPos() const
Definition VPlan.h:2924
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2897
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2922
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2914
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2943
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2908
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3023
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3017
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3030
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3010
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2997
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2953
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2980
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2963
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2974
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2955
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1574
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1603
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1598
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4292
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1623
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1583
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1608
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1612
~VPPredInstPHIRecipe() override=default
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3478
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3489
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3474
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:401
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:548
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4493
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:559
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:475
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:553
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:524
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:403
const VPBasicBlock * getParent() const
Definition VPlan.h:476
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:529
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:419
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:521
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:465
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:309
LLVM_ABI_FOR_TEST VPRecipeValue(VPRecipeBase *Def, Value *UV=nullptr)
Definition VPlan.cpp:144
friend class VPValue
Definition VPlanValue.h:310
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3178
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3157
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3181
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3168
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2752
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2738
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2717
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2731
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2764
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2746
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2705
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2755
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2769
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2761
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2749
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3046
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3055
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3120
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3089
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3104
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3131
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3133
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3116
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3069
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3118
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3075
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3122
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3129
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3124
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3083
static bool classof(const VPUser *U)
Definition VPlan.h:3094
static bool classof(const VPValue *VPV)
Definition VPlan.h:3099
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3138
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4358
const VPBlockBase * getEntry() const
Definition VPlan.h:4402
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4434
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4419
VPBlockBase * getExiting()
Definition VPlan.h:4415
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4473
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4407
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4478
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4483
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4486
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4470
const VPBlockBase * getExiting() const
Definition VPlan.h:4414
VPBlockBase * getEntry()
Definition VPlan.h:4403
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4427
friend class VPlan
Definition VPlan.h:4359
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4398
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:209
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3200
bool isSingleScalar() const
Definition VPlan.h:3248
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3208
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3260
bool isPredicated() const
Definition VPlan.h:3250
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3222
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3253
unsigned getOpcode() const
Definition VPlan.h:3272
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3267
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4064
VPValue * getStepValue() const
Definition VPlan.h:4037
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4031
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4004
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4050
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4016
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4045
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4041
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3997
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4058
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:605
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:611
static bool classof(const VPValue *V)
Definition VPlan.h:657
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:670
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:615
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:673
static bool classof(const VPUser *U)
Definition VPlan.h:662
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:607
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1153
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:335
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1524
operand_range operands()
Definition VPlanValue.h:403
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:379
unsigned getNumOperands() const
Definition VPlanValue.h:373
operand_iterator op_end()
Definition VPlanValue.h:401
operand_iterator op_begin()
Definition VPlanValue.h:399
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:374
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:354
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:397
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:396
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:138
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:128
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:74
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:202
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2188
VPValue * getVFValue() const
Definition VPlan.h:2177
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2174
int64_t getStride() const
Definition VPlan.h:2175
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2209
VPValue * getOffset() const
Definition VPlan.h:2178
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2202
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2164
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2195
VPValue * getPointer() const
Definition VPlan.h:2176
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2246
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2248
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2255
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2233
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2271
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2262
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1988
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1995
const_operand_range args() const
Definition VPlan.h:2032
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2010
operand_range args()
Definition VPlan.h:2031
Function * getCalledScalarFunction() const
Definition VPlan.h:2027
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV)
Definition VPlan.h:3883
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3900
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:3907
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3888
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1830
Instruction::CastOps getOpcode() const
Definition VPlan.h:1868
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1871
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1838
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1853
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2126
Type * getSourceElementType() const
Definition VPlan.h:2131
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2134
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2118
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2104
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2439
static bool classof(const VPValue *V)
Definition VPlan.h:2387
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2406
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2424
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2399
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2414
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2417
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2375
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2402
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2422
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2431
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2382
const VPValue * getVFValue() const
Definition VPlan.h:2409
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2392
const VPValue * getStepValue() const
Definition VPlan.h:2403
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2500
const TruncInst * getTruncInst() const
Definition VPlan.h:2516
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2494
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2504
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2486
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2460
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2515
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2469
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2531
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2511
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2524
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1882
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1913
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1959
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1968
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1899
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1974
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1934
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1971
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1962
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
A common mixin class for widening memory operations.
Definition VPlan.h:3505
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3516
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3539
virtual ~VPWidenMemoryRecipe()=default
Instruction & Ingredient
Definition VPlan.h:3507
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition VPlan.h:3561
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3513
virtual const VPRecipeBase * getAsRecipe() const =0
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3549
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3510
VPWidenMemoryRecipe(Instruction &I, bool Consecutive, const VPIRMetadata &Metadata)
Definition VPlan.h:3526
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3545
void setMask(VPValue *Mask)
Definition VPlan.h:3518
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3556
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3542
A recipe for widened phis.
Definition VPlan.h:2589
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2623
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2601
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPWidenPHIRecipe(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe with incoming values IncomingvValues, debug location DL and Name.
Definition VPlan.h:2596
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2558
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2567
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2548
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1774
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1794
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1821
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1778
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1786
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1811
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4506
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4831
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1174
friend class VPSlotTracker
Definition VPlan.h:4508
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1150
bool hasVF(ElementCount VF) const
Definition VPlan.h:4729
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:4742
const DataLayout & getDataLayout() const
Definition VPlan.h:4711
LLVMContext & getContext() const
Definition VPlan.h:4707
VPBasicBlock * getEntry()
Definition VPlan.h:4602
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:4919
void setName(const Twine &newName)
Definition VPlan.h:4775
bool hasScalableVF() const
Definition VPlan.h:4730
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4665
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4686
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4736
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:899
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:874
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4793
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:907
const VPBasicBlock * getEntry() const
Definition VPlan.h:4603
friend class VPlanPrinter
Definition VPlan.h:4507
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4802
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4825
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4705
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:4808
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4880
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1309
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4834
bool hasUF(unsigned UF) const
Definition VPlan.h:4754
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4655
VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and vect...
Definition VPlan.h:4583
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4695
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4692
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4779
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:4868
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4805
void setVF(ElementCount VF)
Definition VPlan.h:4717
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4770
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1065
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4902
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1047
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1080
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4757
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4819
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4640
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4672
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4679
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4631
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4591
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4857
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1315
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4724
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4799
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4607
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1180
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4702
bool hasScalarVFOnly() const
Definition VPlan.h:4747
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4645
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:917
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1133
void addVF(ElementCount VF)
Definition VPlan.h:4715
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4651
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1089
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4698
void setUF(unsigned UF)
Definition VPlan.h:4762
const VPSymbolicValue & getVF() const
Definition VPlan.h:4699
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:4912
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1221
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4813
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2506
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
CastInfo helper for casting from VPRecipeBase to a mixin class that is not part of the VPRecipeBase c...
Definition VPlan.h:4077
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:557
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
LLVM_PACKED_END
Definition VPlan.h:1103
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2678
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:78
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:88
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2676
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:73
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2670
Possible variants of a reduction.
Definition VPlan.h:2668
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2673
unsigned VFScaleFactor
Definition VPlan.h:2674
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:258
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2639
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2651
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2630
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:717
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:722
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:712
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:705
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1742
PHINode & getIRPhi()
Definition VPlan.h:1755
VPIRPhi(PHINode &PN)
Definition VPlan.h:1743
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1745
static bool classof(const VPUser *U)
Definition VPlan.h:1750
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1766
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:142
static bool classof(const VPUser *U)
Definition VPlan.h:1642
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1638
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1657
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1672
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1652
static bool classof(const VPValue *V)
Definition VPlan.h:1647
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1107
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1140
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1113
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1108
static bool classof(const VPValue *V)
Definition VPlan.h:1133
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1128
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:280
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3615
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3651
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
VPWidenLoadEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3624
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3650
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3631
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3616
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3641
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3567
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3568
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3592
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3602
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3575
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadRecipe.
Definition VPlan.h:3586
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3601
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3716
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3732
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3725
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3760
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3717
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3745
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3759
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3735
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3663
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3702
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3664
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3681
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3672
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3703
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreRecipe.
Definition VPlan.h:3687
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3693
static VPMixin * castFailed()
Definition VPlan.h:4095
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4086
static VPMixin * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4089