LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class SCEVPredicate;
61class Type;
62class VPBasicBlock;
63class VPBuilder;
64class VPDominatorTree;
65class VPRegionBlock;
66class VPlan;
67class VPLane;
69class Value;
71
72struct VPCostContext;
73
74using VPlanPtr = std::unique_ptr<VPlan>;
75
76/// \enum UncountableExitStyle
77/// Different methods of handling early exits.
78///
81 /// No side effects to worry about, so we can process any uncountable exits
82 /// in the loop and branch either to the middle block if the trip count was
83 /// reached, or an early exitblock to determine which exit was taken.
85 /// All memory operations other than the load(s) required to determine whether
86 /// an uncountable exit occurre will be masked based on that condition. If an
87 /// uncountable exit is taken, then all lanes before the exiting lane will
88 /// complete, leaving just the final lane to execute in the scalar tail.
90};
91
92/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
93/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
95 friend class VPBlockUtils;
96
97 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
98
99 /// An optional name for the block.
100 std::string Name;
101
102 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
103 /// it is a topmost VPBlockBase.
104 VPRegionBlock *Parent = nullptr;
105
106 /// List of predecessor blocks.
108
109 /// List of successor blocks.
111
112 /// VPlan containing the block. Can only be set on the entry block of the
113 /// plan.
114 VPlan *Plan = nullptr;
115
116 /// Add \p Successor as the last successor to this block.
117 void appendSuccessor(VPBlockBase *Successor) {
118 assert(Successor && "Cannot add nullptr successor!");
119 Successors.push_back(Successor);
120 }
121
122 /// Add \p Predecessor as the last predecessor to this block.
123 void appendPredecessor(VPBlockBase *Predecessor) {
124 assert(Predecessor && "Cannot add nullptr predecessor!");
125 Predecessors.push_back(Predecessor);
126 }
127
128 /// Remove \p Predecessor from the predecessors of this block.
129 void removePredecessor(VPBlockBase *Predecessor) {
130 auto Pos = find(Predecessors, Predecessor);
131 assert(Pos && "Predecessor does not exist");
132 Predecessors.erase(Pos);
133 }
134
135 /// Remove \p Successor from the successors of this block.
136 void removeSuccessor(VPBlockBase *Successor) {
137 auto Pos = find(Successors, Successor);
138 assert(Pos && "Successor does not exist");
139 Successors.erase(Pos);
140 }
141
142 /// This function replaces one predecessor with another, useful when
143 /// trying to replace an old block in the CFG with a new one.
144 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
145 auto I = find(Predecessors, Old);
146 assert(I != Predecessors.end());
147 assert(Old->getParent() == New->getParent() &&
148 "replaced predecessor must have the same parent");
149 *I = New;
150 }
151
152 /// This function replaces one successor with another, useful when
153 /// trying to replace an old block in the CFG with a new one.
154 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
155 auto I = find(Successors, Old);
156 assert(I != Successors.end());
157 assert(Old->getParent() == New->getParent() &&
158 "replaced successor must have the same parent");
159 *I = New;
160 }
161
162protected:
163 VPBlockBase(const unsigned char SC, const std::string &N)
164 : SubclassID(SC), Name(N) {}
165
166public:
167 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
168 /// that are actually instantiated. Values of this enumeration are kept in the
169 /// SubclassID field of the VPBlockBase objects. They are used for concrete
170 /// type identification.
171 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
172
174
175 virtual ~VPBlockBase() = default;
176
177 const std::string &getName() const { return Name; }
178
179 void setName(const Twine &newName) { Name = newName.str(); }
180
181 /// \return an ID for the concrete type of this object.
182 /// This is used to implement the classof checks. This should not be used
183 /// for any other purpose, as the values may change as LLVM evolves.
184 unsigned getVPBlockID() const { return SubclassID; }
185
186 VPRegionBlock *getParent() { return Parent; }
187 const VPRegionBlock *getParent() const { return Parent; }
188
189 /// \return A pointer to the plan containing the current block.
190 VPlan *getPlan();
191 const VPlan *getPlan() const;
192
193 /// Sets the pointer of the plan containing the block. The block must be the
194 /// entry block into the VPlan.
195 void setPlan(VPlan *ParentPlan);
196
197 void setParent(VPRegionBlock *P) { Parent = P; }
198
199 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
200 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
201 /// VPBlockBase is a VPBasicBlock, it is returned.
202 const VPBasicBlock *getEntryBasicBlock() const;
203 VPBasicBlock *getEntryBasicBlock();
204
205 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
206 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
207 /// VPBlockBase is a VPBasicBlock, it is returned.
208 const VPBasicBlock *getExitingBasicBlock() const;
209 VPBasicBlock *getExitingBasicBlock();
210
211 const VPBlocksTy &getSuccessors() const { return Successors; }
212 VPBlocksTy &getSuccessors() { return Successors; }
213
214 /// Returns true if this block has any successors.
215 bool hasSuccessors() const { return !Successors.empty(); }
216 /// Returns true if this block has any predecessors.
217 bool hasPredecessors() const { return !Predecessors.empty(); }
218
221
222 const VPBlocksTy &getPredecessors() const { return Predecessors; }
223 VPBlocksTy &getPredecessors() { return Predecessors; }
224
225 /// \return the successor of this VPBlockBase if it has a single successor.
226 /// Otherwise return a null pointer.
228 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
229 }
230
231 /// \return the predecessor of this VPBlockBase if it has a single
232 /// predecessor. Otherwise return a null pointer.
234 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
235 }
236
237 size_t getNumSuccessors() const { return Successors.size(); }
238 size_t getNumPredecessors() const { return Predecessors.size(); }
239
240 /// An Enclosing Block of a block B is any block containing B, including B
241 /// itself. \return the closest enclosing block starting from "this", which
242 /// has successors. \return the root enclosing block if all enclosing blocks
243 /// have no successors.
244 VPBlockBase *getEnclosingBlockWithSuccessors();
245
246 /// \return the closest enclosing block starting from "this", which has
247 /// predecessors. \return the root enclosing block if all enclosing blocks
248 /// have no predecessors.
249 VPBlockBase *getEnclosingBlockWithPredecessors();
250
251 /// \return the successors either attached directly to this VPBlockBase or, if
252 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
253 /// successors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has successors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) successors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithSuccessors()->getSuccessors();
259 }
260
261 /// \return the hierarchical successor of this VPBlockBase if it has a single
262 /// hierarchical successor. Otherwise return a null pointer.
264 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
265 }
266
267 /// \return the predecessors either attached directly to this VPBlockBase or,
268 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
269 /// predecessors of its own, search recursively for the first enclosing
270 /// VPRegionBlock that has predecessors and return them. If no such
271 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
272 /// VPBlockBase reached.
274 return getEnclosingBlockWithPredecessors()->getPredecessors();
275 }
276
277 /// \return the hierarchical predecessor of this VPBlockBase if it has a
278 /// single hierarchical predecessor. Otherwise return a null pointer.
282
283 /// Set a given VPBlockBase \p Successor as the single successor of this
284 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
285 /// This VPBlockBase must have no successors.
287 assert(Successors.empty() && "Setting one successor when others exist.");
288 assert(Successor->getParent() == getParent() &&
289 "connected blocks must have the same parent");
290 appendSuccessor(Successor);
291 }
292
293 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
294 /// successors of this VPBlockBase. This VPBlockBase is not added as
295 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
296 /// successors.
297 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
298 assert(Successors.empty() && "Setting two successors when others exist.");
299 appendSuccessor(IfTrue);
300 appendSuccessor(IfFalse);
301 }
302
303 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
304 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
305 /// as successor of any VPBasicBlock in \p NewPreds.
307 assert(Predecessors.empty() && "Block predecessors already set.");
308 for (auto *Pred : NewPreds)
309 appendPredecessor(Pred);
310 }
311
312 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
313 /// This VPBlockBase must have no successors. This VPBlockBase is not added
314 /// as predecessor of any VPBasicBlock in \p NewSuccs.
316 assert(Successors.empty() && "Block successors already set.");
317 for (auto *Succ : NewSuccs)
318 appendSuccessor(Succ);
319 }
320
321 /// Remove all the predecessor of this block.
322 void clearPredecessors() { Predecessors.clear(); }
323
324 /// Remove all the successors of this block.
325 void clearSuccessors() { Successors.clear(); }
326
327 /// Swap predecessors of the block. The block must have exactly 2
328 /// predecessors.
330 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
331 std::swap(Predecessors[0], Predecessors[1]);
332 }
333
334 /// Swap successors of the block. The block must have exactly 2 successors.
335 // TODO: This should be part of introducing conditional branch recipes rather
336 // than being independent.
338 assert(Successors.size() == 2 && "must have 2 successors to swap");
339 std::swap(Successors[0], Successors[1]);
340 }
341
342 /// Returns the index for \p Pred in the blocks predecessors list.
343 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
344 assert(count(Predecessors, Pred) == 1 &&
345 "must have Pred exactly once in Predecessors");
346 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
347 }
348
349 /// Returns the index for \p Succ in the blocks successor list.
350 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
351 assert(count(Successors, Succ) == 1 &&
352 "must have Succ exactly once in Successors");
353 return std::distance(Successors.begin(), find(Successors, Succ));
354 }
355
356 /// The method which generates the output IR that correspond to this
357 /// VPBlockBase, thereby "executing" the VPlan.
358 virtual void execute(VPTransformState *State) = 0;
359
360 /// Return the cost of the block.
362
363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
364 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
365 OS << getName();
366 }
367
368 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
369 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
370 /// consequtive numbers.
371 ///
372 /// Note that the numbering is applied to the whole VPlan, so printing
373 /// individual blocks is consistent with the whole VPlan printing.
374 virtual void print(raw_ostream &O, const Twine &Indent,
375 VPSlotTracker &SlotTracker) const = 0;
376
377 /// Print plain-text dump of this VPlan to \p O.
378 void print(raw_ostream &O) const;
379
380 /// Print the successors of this block to \p O, prefixing all lines with \p
381 /// Indent.
382 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
383
384 /// Dump this VPBlockBase to dbgs().
385 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
386#endif
387
388 /// Clone the current block and it's recipes without updating the operands of
389 /// the cloned recipes, including all blocks in the single-entry single-exit
390 /// region for VPRegionBlocks.
391 virtual VPBlockBase *clone() = 0;
392};
393
394/// VPRecipeBase is a base class modeling a sequence of one or more output IR
395/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
396/// and is responsible for deleting its defined values. Single-value
397/// recipes must inherit from VPSingleDef instead of inheriting from both
398/// VPRecipeBase and VPValue separately.
400 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
401 public VPDef,
402 public VPUser {
403 friend VPBasicBlock;
404 friend class VPBlockUtils;
405
406 /// Subclass identifier (for isa/dyn_cast).
407 const unsigned char SubclassID;
408
409 /// Each VPRecipe belongs to a single VPBasicBlock.
410 VPBasicBlock *Parent = nullptr;
411
412 /// The debug location for the recipe.
413 DebugLoc DL;
414
415public:
416 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
417 /// that is actually instantiated. Values of this enumeration are kept in the
418 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
419 /// type identification.
420 using VPRecipeTy = enum {
421 VPBranchOnMaskSC,
422 VPDerivedIVSC,
423 VPExpandSCEVSC,
424 VPExpressionSC,
425 VPIRInstructionSC,
426 VPInstructionSC,
427 VPInterleaveEVLSC,
428 VPInterleaveSC,
429 VPReductionEVLSC,
430 VPReductionSC,
431 VPReplicateSC,
432 VPScalarIVStepsSC,
433 VPVectorPointerSC,
434 VPVectorEndPointerSC,
435 VPWidenCallSC,
436 VPWidenCanonicalIVSC,
437 VPWidenCastSC,
438 VPWidenGEPSC,
439 VPWidenIntrinsicSC,
440 VPWidenMemIntrinsicSC,
441 VPWidenLoadEVLSC,
442 VPWidenLoadSC,
443 VPWidenStoreEVLSC,
444 VPWidenStoreSC,
445 VPWidenSC,
446 VPBlendSC,
447 VPHistogramSC,
448 // START: Phi-like recipes. Need to be kept together.
449 VPWidenPHISC,
450 VPPredInstPHISC,
451 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
452 // VPHeaderPHIRecipe need to be kept together.
453 VPCurrentIterationPHISC,
454 VPActiveLaneMaskPHISC,
455 VPFirstOrderRecurrencePHISC,
456 VPWidenIntOrFpInductionSC,
457 VPWidenPointerInductionSC,
458 VPReductionPHISC,
459 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
460 // END: Phi-like recipes
461 VPFirstPHISC = VPWidenPHISC,
462 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
463 VPLastHeaderPHISC = VPReductionPHISC,
464 VPLastPHISC = VPReductionPHISC,
465 };
466
467 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
469 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
470
471 ~VPRecipeBase() override = default;
472
473 /// Clone the current recipe.
474 virtual VPRecipeBase *clone() = 0;
475
476 /// \return the VPBasicBlock which this VPRecipe belongs to.
477 VPBasicBlock *getParent() { return Parent; }
478 const VPBasicBlock *getParent() const { return Parent; }
479
480 /// \return the VPRegionBlock which the recipe belongs to.
481 VPRegionBlock *getRegion();
482 const VPRegionBlock *getRegion() const;
483
484 /// The method which generates the output IR instructions that correspond to
485 /// this VPRecipe, thereby "executing" the VPlan.
486 virtual void execute(VPTransformState &State) = 0;
487
488 /// Return the cost of this recipe, taking into account if the cost
489 /// computation should be skipped and the ForceTargetInstructionCost flag.
490 /// Also takes care of printing the cost for debugging.
492
493 /// Insert an unlinked recipe into a basic block immediately before
494 /// the specified recipe.
495 void insertBefore(VPRecipeBase *InsertPos);
496 /// Insert an unlinked recipe into \p BB immediately before the insertion
497 /// point \p IP;
498 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
499
500 /// Insert an unlinked Recipe into a basic block immediately after
501 /// the specified Recipe.
502 void insertAfter(VPRecipeBase *InsertPos);
503
504 /// Unlink this recipe from its current VPBasicBlock and insert it into
505 /// the VPBasicBlock that MovePos lives in, right after MovePos.
506 void moveAfter(VPRecipeBase *MovePos);
507
508 /// Unlink this recipe and insert into BB before I.
509 ///
510 /// \pre I is a valid iterator into BB.
511 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
512
513 /// This method unlinks 'this' from the containing basic block, but does not
514 /// delete it.
515 void removeFromParent();
516
517 /// This method unlinks 'this' from the containing basic block and deletes it.
518 ///
519 /// \returns an iterator pointing to the element after the erased one
521
522 /// \return an ID for the concrete type of this object.
523 unsigned getVPRecipeID() const { return SubclassID; }
524
525 /// Method to support type inquiry through isa, cast, and dyn_cast.
526 static inline bool classof(const VPDef *D) {
527 // All VPDefs are also VPRecipeBases.
528 return true;
529 }
530
531 static inline bool classof(const VPUser *U) { return true; }
532
533 /// Returns true if the recipe may have side-effects.
534 bool mayHaveSideEffects() const;
535
536 /// Return true if we can safely execute this recipe unconditionally even if
537 /// it is masked originally.
538 bool isSafeToSpeculativelyExecute() const;
539
540 /// Returns true for PHI-like recipes.
541 bool isPhi() const;
542
543 /// Returns true if the recipe may read from memory.
544 bool mayReadFromMemory() const;
545
546 /// Returns true if the recipe may write to memory.
547 bool mayWriteToMemory() const;
548
549 /// Returns true if the recipe may read from or write to memory.
550 bool mayReadOrWriteMemory() const {
552 }
553
554 /// Returns the debug location of the recipe.
555 DebugLoc getDebugLoc() const { return DL; }
556
557 /// Set the recipe's debug location to \p NewDL.
558 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
559
560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561 /// Dump the recipe to stderr (for debugging).
562 LLVM_ABI_FOR_TEST void dump() const;
563
564 /// Print the recipe, delegating to printRecipe().
565 void print(raw_ostream &O, const Twine &Indent,
567#endif
568
569protected:
570 /// Compute the cost of this recipe either using a recipe's specialized
571 /// implementation or using the legacy cost model and the underlying
572 /// instructions.
573 virtual InstructionCost computeCost(ElementCount VF,
574 VPCostContext &Ctx) const;
575
576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
577 /// Each concrete VPRecipe prints itself, without printing common information,
578 /// like debug info or metadata.
579 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
580 VPSlotTracker &SlotTracker) const = 0;
581#endif
582};
583
584// Helper macro to define common classof implementations for recipes.
585#define VP_CLASSOF_IMPL(VPRecipeID) \
586 static inline bool classof(const VPRecipeBase *R) { \
587 return R->getVPRecipeID() == VPRecipeID; \
588 } \
589 static inline bool classof(const VPValue *V) { \
590 auto *R = V->getDefiningRecipe(); \
591 return R && R->getVPRecipeID() == VPRecipeID; \
592 } \
593 static inline bool classof(const VPUser *U) { \
594 auto *R = dyn_cast<VPRecipeBase>(U); \
595 return R && R->getVPRecipeID() == VPRecipeID; \
596 } \
597 static inline bool classof(const VPSingleDefRecipe *R) { \
598 return R->getVPRecipeID() == VPRecipeID; \
599 }
600
601/// Compute the scalar result type for an IR \p Opcode given \p Operands.
602LLVM_ABI Type *computeScalarTypeForInstruction(unsigned Opcode,
603 ArrayRef<VPValue *> Operands);
604
605/// VPSingleDefRecipe is a base class for recipes that model a sequence of one
606/// or more output IR that define a single result VPValue. Note that
607/// VPSingleDefRecipe must inherit from VPRecipeBase before VPSingleDefValue.
609 public VPSingleDefValue {
610public:
611 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
613 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this) {}
614
615 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
617 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV) {}
618
619 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
620 Type *ResultTy, Value *UV = nullptr,
622 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV, ResultTy) {}
623
624 static inline bool classof(const VPRecipeBase *R) {
625 switch (R->getVPRecipeID()) {
626 case VPRecipeBase::VPDerivedIVSC:
627 case VPRecipeBase::VPExpandSCEVSC:
628 case VPRecipeBase::VPExpressionSC:
629 case VPRecipeBase::VPInstructionSC:
630 case VPRecipeBase::VPReductionEVLSC:
631 case VPRecipeBase::VPReductionSC:
632 case VPRecipeBase::VPReplicateSC:
633 case VPRecipeBase::VPScalarIVStepsSC:
634 case VPRecipeBase::VPVectorPointerSC:
635 case VPRecipeBase::VPVectorEndPointerSC:
636 case VPRecipeBase::VPWidenCallSC:
637 case VPRecipeBase::VPWidenCanonicalIVSC:
638 case VPRecipeBase::VPWidenCastSC:
639 case VPRecipeBase::VPWidenGEPSC:
640 case VPRecipeBase::VPWidenIntrinsicSC:
641 case VPRecipeBase::VPWidenMemIntrinsicSC:
642 case VPRecipeBase::VPWidenSC:
643 case VPRecipeBase::VPBlendSC:
644 case VPRecipeBase::VPPredInstPHISC:
645 case VPRecipeBase::VPCurrentIterationPHISC:
646 case VPRecipeBase::VPActiveLaneMaskPHISC:
647 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
648 case VPRecipeBase::VPWidenPHISC:
649 case VPRecipeBase::VPWidenIntOrFpInductionSC:
650 case VPRecipeBase::VPWidenPointerInductionSC:
651 case VPRecipeBase::VPReductionPHISC:
652 case VPRecipeBase::VPWidenLoadEVLSC:
653 case VPRecipeBase::VPWidenLoadSC:
654 return true;
655 case VPRecipeBase::VPBranchOnMaskSC:
656 case VPRecipeBase::VPInterleaveEVLSC:
657 case VPRecipeBase::VPInterleaveSC:
658 case VPRecipeBase::VPIRInstructionSC:
659 case VPRecipeBase::VPWidenStoreEVLSC:
660 case VPRecipeBase::VPWidenStoreSC:
661 case VPRecipeBase::VPHistogramSC:
662 return false;
663 }
664 llvm_unreachable("Unhandled VPRecipeID");
665 }
666
667 static inline bool classof(const VPValue *V) {
668 auto *R = V->getDefiningRecipe();
669 return R && classof(R);
670 }
671
672 static inline bool classof(const VPUser *U) {
673 auto *R = dyn_cast<VPRecipeBase>(U);
674 return R && classof(R);
675 }
676
677 VPSingleDefRecipe *clone() override = 0;
678
679 /// Returns the underlying instruction.
686
687#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
688 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
690#endif
691};
692
693/// Class to record and manage LLVM IR flags.
696 enum class OperationType : unsigned char {
697 Cmp,
698 FCmp,
699 OverflowingBinOp,
700 Trunc,
701 DisjointOp,
702 PossiblyExactOp,
703 GEPOp,
704 FPMathOp,
705 NonNegOp,
706 ReductionOp,
707 Other
708 };
709
710public:
711 struct WrapFlagsTy {
712 char HasNUW : 1;
713 char HasNSW : 1;
714
716 };
717
719 char HasNUW : 1;
720 char HasNSW : 1;
721
723 };
724
729
731 char NonNeg : 1;
732 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
733 };
734
735private:
736 struct ExactFlagsTy {
737 char IsExact : 1;
738 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
739 };
740 struct FastMathFlagsTy {
741 char AllowReassoc : 1;
742 char NoNaNs : 1;
743 char NoInfs : 1;
744 char NoSignedZeros : 1;
745 char AllowReciprocal : 1;
746 char AllowContract : 1;
747 char ApproxFunc : 1;
748
749 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
750 };
751 /// Holds both the predicate and fast-math flags for floating-point
752 /// comparisons.
753 struct FCmpFlagsTy {
754 uint8_t CmpPredStorage;
755 FastMathFlagsTy FMFs;
756 };
757 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
758 struct ReductionFlagsTy {
759 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
760 // additional kinds.
761 unsigned char Kind : 6;
762 // TODO: Derive order/in-loop from plan and remove here.
763 unsigned char IsOrdered : 1;
764 unsigned char IsInLoop : 1;
765 FastMathFlagsTy FMFs;
766
767 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
768 FastMathFlags FMFs)
769 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
770 IsInLoop(IsInLoop), FMFs(FMFs) {}
771 };
772
773 OperationType OpType;
774
775 union {
780 ExactFlagsTy ExactFlags;
783 FastMathFlagsTy FMFs;
784 FCmpFlagsTy FCmpFlags;
785 ReductionFlagsTy ReductionFlags;
787 };
788
789public:
790 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
791
793 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
794 OpType = OperationType::FCmp;
796 FCmp->getPredicate());
797 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
798 FCmpFlags.FMFs = FCmp->getFastMathFlags();
799 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
800 OpType = OperationType::Cmp;
802 Op->getPredicate());
803 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
804 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
805 OpType = OperationType::DisjointOp;
806 DisjointFlags.IsDisjoint = Op->isDisjoint();
807 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
808 OpType = OperationType::OverflowingBinOp;
809 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
810 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
811 OpType = OperationType::Trunc;
812 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
813 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
814 OpType = OperationType::PossiblyExactOp;
815 ExactFlags.IsExact = Op->isExact();
816 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
817 OpType = OperationType::GEPOp;
818 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
819 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
820 "wrap flags truncated");
821 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
822 OpType = OperationType::NonNegOp;
823 NonNegFlags.NonNeg = PNNI->hasNonNeg();
824 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
825 OpType = OperationType::FPMathOp;
826 FMFs = Op->getFastMathFlags();
827 }
828 }
829
830 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
832 assert(getPredicate() == Pred && "predicate truncated");
833 }
834
836 : OpType(OperationType::FCmp), AllFlags() {
838 assert(getPredicate() == Pred && "predicate truncated");
839 FCmpFlags.FMFs = FMFs;
840 }
841
843 : OpType(OperationType::OverflowingBinOp), AllFlags() {
844 this->WrapFlags = WrapFlags;
845 }
846
848 : OpType(OperationType::Trunc), AllFlags() {
849 this->TruncFlags = TruncFlags;
850 }
851
852 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
853 this->FMFs = FMFs;
854 }
855
857 : OpType(OperationType::DisjointOp), AllFlags() {
858 this->DisjointFlags = DisjointFlags;
859 }
860
862 : OpType(OperationType::NonNegOp), AllFlags() {
863 this->NonNegFlags = NonNegFlags;
864 }
865
866 VPIRFlags(ExactFlagsTy ExactFlags)
867 : OpType(OperationType::PossiblyExactOp), AllFlags() {
868 this->ExactFlags = ExactFlags;
869 }
870
872 : OpType(OperationType::GEPOp), AllFlags() {
873 GEPFlagsStorage = GEPFlags.getRaw();
874 }
875
876 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
877 : OpType(OperationType::ReductionOp), AllFlags() {
878 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
879 }
880
882 OpType = Other.OpType;
883 AllFlags[0] = Other.AllFlags[0];
884 AllFlags[1] = Other.AllFlags[1];
885 }
886
887 /// Only keep flags also present in \p Other. \p Other must have the same
888 /// OpType as the current object.
889 void intersectFlags(const VPIRFlags &Other);
890
891 /// Drop all poison-generating flags.
893 // NOTE: This needs to be kept in-sync with
894 // Instruction::dropPoisonGeneratingFlags.
895 switch (OpType) {
896 case OperationType::OverflowingBinOp:
897 WrapFlags.HasNUW = false;
898 WrapFlags.HasNSW = false;
899 break;
900 case OperationType::Trunc:
901 TruncFlags.HasNUW = false;
902 TruncFlags.HasNSW = false;
903 break;
904 case OperationType::DisjointOp:
905 DisjointFlags.IsDisjoint = false;
906 break;
907 case OperationType::PossiblyExactOp:
908 ExactFlags.IsExact = false;
909 break;
910 case OperationType::GEPOp:
911 GEPFlagsStorage = 0;
912 break;
913 case OperationType::FPMathOp:
914 case OperationType::FCmp:
915 case OperationType::ReductionOp:
916 getFMFsRef().NoNaNs = false;
917 getFMFsRef().NoInfs = false;
918 break;
919 case OperationType::NonNegOp:
920 NonNegFlags.NonNeg = false;
921 break;
922 case OperationType::Cmp:
923 case OperationType::Other:
924 break;
925 }
926 }
927
928 /// Apply the IR flags to \p I.
929 void applyFlags(Instruction &I) const {
930 switch (OpType) {
931 case OperationType::OverflowingBinOp:
932 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
933 I.setHasNoSignedWrap(WrapFlags.HasNSW);
934 break;
935 case OperationType::Trunc:
936 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
937 I.setHasNoSignedWrap(TruncFlags.HasNSW);
938 break;
939 case OperationType::DisjointOp:
940 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
941 break;
942 case OperationType::PossiblyExactOp:
943 I.setIsExact(ExactFlags.IsExact);
944 break;
945 case OperationType::GEPOp:
946 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
948 break;
949 case OperationType::FPMathOp:
950 case OperationType::FCmp: {
951 const FastMathFlagsTy &F = getFMFsRef();
952 I.setHasAllowReassoc(F.AllowReassoc);
953 I.setHasNoNaNs(F.NoNaNs);
954 I.setHasNoInfs(F.NoInfs);
955 I.setHasNoSignedZeros(F.NoSignedZeros);
956 I.setHasAllowReciprocal(F.AllowReciprocal);
957 I.setHasAllowContract(F.AllowContract);
958 I.setHasApproxFunc(F.ApproxFunc);
959 break;
960 }
961 case OperationType::NonNegOp:
962 I.setNonNeg(NonNegFlags.NonNeg);
963 break;
964 case OperationType::ReductionOp:
965 llvm_unreachable("reduction ops should not use applyFlags");
966 case OperationType::Cmp:
967 case OperationType::Other:
968 break;
969 }
970 }
971
973 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
974 "recipe doesn't have a compare predicate");
975 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
978 }
979
981 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
982 "recipe doesn't have a compare predicate");
983 if (OpType == OperationType::FCmp)
985 else
987 assert(getPredicate() == Pred && "predicate truncated");
988 }
989
993
994 /// Returns true if the recipe has a comparison predicate.
995 bool hasPredicate() const {
996 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
997 }
998
999 /// Returns true if the recipe has fast-math flags.
1000 bool hasFastMathFlags() const {
1001 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
1002 OpType == OperationType::ReductionOp;
1003 }
1004
1006
1007 /// Returns true if the recipe has non-negative flag.
1008 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1009
1010 bool isNonNeg() const {
1011 assert(OpType == OperationType::NonNegOp &&
1012 "recipe doesn't have a NNEG flag");
1013 return NonNegFlags.NonNeg;
1014 }
1015
1016 bool hasNoUnsignedWrap() const {
1017 switch (OpType) {
1018 case OperationType::OverflowingBinOp:
1019 return WrapFlags.HasNUW;
1020 case OperationType::Trunc:
1021 return TruncFlags.HasNUW;
1022 default:
1023 llvm_unreachable("recipe doesn't have a NUW flag");
1024 }
1025 }
1026
1027 bool hasNoSignedWrap() const {
1028 switch (OpType) {
1029 case OperationType::OverflowingBinOp:
1030 return WrapFlags.HasNSW;
1031 case OperationType::Trunc:
1032 return TruncFlags.HasNSW;
1033 default:
1034 llvm_unreachable("recipe doesn't have a NSW flag");
1035 }
1036 }
1037
1038 bool hasNoWrapFlags() const {
1039 switch (OpType) {
1040 case OperationType::OverflowingBinOp:
1041 case OperationType::Trunc:
1042 return true;
1043 default:
1044 return false;
1045 }
1046 }
1047
1049 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1050 }
1051
1052 bool isDisjoint() const {
1053 assert(OpType == OperationType::DisjointOp &&
1054 "recipe cannot have a disjoing flag");
1055 return DisjointFlags.IsDisjoint;
1056 }
1057
1059 assert(OpType == OperationType::ReductionOp &&
1060 "recipe doesn't have reduction flags");
1061 return static_cast<RecurKind>(ReductionFlags.Kind);
1062 }
1063
1064 bool isReductionOrdered() const {
1065 assert(OpType == OperationType::ReductionOp &&
1066 "recipe doesn't have reduction flags");
1067 return ReductionFlags.IsOrdered;
1068 }
1069
1070 bool isReductionInLoop() const {
1071 assert(OpType == OperationType::ReductionOp &&
1072 "recipe doesn't have reduction flags");
1073 return ReductionFlags.IsInLoop;
1074 }
1075
1076private:
1077 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1078 FastMathFlagsTy &getFMFsRef() {
1079 if (OpType == OperationType::FCmp)
1080 return FCmpFlags.FMFs;
1081 if (OpType == OperationType::ReductionOp)
1082 return ReductionFlags.FMFs;
1083 return FMFs;
1084 }
1085 const FastMathFlagsTy &getFMFsRef() const {
1086 if (OpType == OperationType::FCmp)
1087 return FCmpFlags.FMFs;
1088 if (OpType == OperationType::ReductionOp)
1089 return ReductionFlags.FMFs;
1090 return FMFs;
1091 }
1092
1093public:
1094 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1095 /// otherwise. Opcodes not supporting default flags include compares and
1096 /// ComputeReductionResult.
1097 static VPIRFlags getDefaultFlags(unsigned Opcode);
1098
1099#if !defined(NDEBUG)
1100 /// Returns true if the set flags are valid for \p Opcode.
1101 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1102
1103 /// Returns true if \p Opcode has its required flags set.
1104 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1105#endif
1106
1107#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1108 void printFlags(raw_ostream &O) const;
1109#endif
1110};
1112
1113static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1114
1115/// A pure-virtual common base class for recipes defining a single VPValue and
1116/// using IR flags.
1118 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1119 const VPIRFlags &Flags,
1121 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1122
1123 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1124 Type *ResultTy, const VPIRFlags &Flags,
1126 : VPSingleDefRecipe(SC, Operands, ResultTy, /*UV=*/nullptr, DL),
1127 VPIRFlags(Flags) {}
1128
1129 static inline bool classof(const VPRecipeBase *R) {
1130 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1131 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1132 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1133 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1134 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1135 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1136 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1137 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC ||
1138 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1139 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1140 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1141 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1142 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC ||
1143 R->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1144 }
1145
1146 static inline bool classof(const VPUser *U) {
1147 auto *R = dyn_cast<VPRecipeBase>(U);
1148 return R && classof(R);
1149 }
1150
1151 static inline bool classof(const VPValue *V) {
1152 auto *R = V->getDefiningRecipe();
1153 return R && classof(R);
1154 }
1155
1157
1158 static inline bool classof(const VPSingleDefRecipe *R) {
1159 return classof(static_cast<const VPRecipeBase *>(R));
1160 }
1161
1162 void execute(VPTransformState &State) override = 0;
1163
1164 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1166 VPCostContext &Ctx) const;
1167};
1168
1169/// Helper to manage IR metadata for recipes. It filters out metadata that
1170/// cannot be propagated.
1173
1174public:
1175 VPIRMetadata() = default;
1176
1177 /// Adds metatadata that can be preserved from the original instruction
1178 /// \p I.
1180
1181 /// Copy constructor for cloning.
1183
1185
1186 /// Add all metadata to \p I.
1187 void applyMetadata(Instruction &I) const;
1188
1189 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1190 /// already exists, it will be replaced. Otherwise, it will be added.
1191 void setMetadata(unsigned Kind, MDNode *Node) {
1192 auto It =
1193 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1194 return P.first == Kind;
1195 });
1196 if (It != Metadata.end())
1197 It->second = Node;
1198 else
1199 Metadata.emplace_back(Kind, Node);
1200 }
1201
1202 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1203 /// nodes that are common to both.
1204 void intersect(const VPIRMetadata &MD);
1205
1206 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1207 MDNode *getMetadata(unsigned Kind) const {
1208 auto It =
1209 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1210 return It != Metadata.end() ? It->second : nullptr;
1211 }
1212
1213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1214 /// Print metadata with node IDs.
1215 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1216#endif
1217};
1218
1219/// This is a concrete Recipe that models a single VPlan-level instruction.
1220/// While as any Recipe it may generate a sequence of IR instructions when
1221/// executed, these instructions would always form a single-def expression as
1222/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1223/// opcodes can take an optional mask. Masks may be assigned during
1224/// predication.
1226 public VPIRMetadata {
1227public:
1228 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1229 enum {
1231 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1232 // values of a first-order recurrence.
1234 // Creates a mask where each lane is active (true) whilst the current
1235 // counter (first operand + index) is less than the second operand. i.e.
1236 // mask[i] = icmpt ult (op0 + i), op1
1237 // The size of the mask returned is VF * Multiplier (UF, third op).
1240 // Represents the incoming loop-invariant alias-mask. All memory accesses
1241 // in the loop must stay within the active lanes.
1244 // Increment the canonical IV separately for each unrolled part.
1246 // Abstract instruction that compares two values and branches. This is
1247 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1250 // Branch with 2 boolean condition operands and 3 successors. If condition
1251 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1252 // successor 1; otherwise branches to successor 2. Expanded after region
1253 // dissolution into: (1) an OR of the two conditions branching to
1254 // middle.split or successor 2, and (2) middle.split branching to successor
1255 // 0 or successor 1 based on condition 0.
1258 /// Given operands of (the same) struct type, creates a struct of fixed-
1259 /// width vectors each containing a struct field of all operands. The
1260 /// number of operands matches the element count of every vector.
1262 /// Creates a fixed-width vector containing all operands. The number of
1263 /// operands matches the vector element count.
1265 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1266 /// abstract VPInstruction whose single defined VPValue represents VF
1267 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1268 /// VPInstructions.
1270 /// Reduce the operands to the final reduction result using the operation
1271 /// specified via the operation's VPIRFlags.
1273 // Extracts the last part of its operand. Removed during unrolling.
1275 // Extracts the last lane of its vector operand, per part.
1277 // Extracts the second-to-last lane from its operand or the second-to-last
1278 // part if it is scalar. In the latter case, the recipe will be removed
1279 // during unrolling.
1281 LogicalAnd, // Non-poison propagating logical And.
1282 LogicalOr, // Non-poison propagating logical Or.
1283 NumActiveLanes, // Counts the number of active lanes in a mask.
1284 // Add an offset in bytes (second operand) to a base pointer (first
1285 // operand). Only generates scalar values (either for the first lane only or
1286 // for all lanes, depending on its uses).
1288 // Add a vector offset in bytes (second operand) to a scalar base pointer
1289 // (first operand).
1291 // Returns a scalar boolean value, which is true if any lane of its
1292 // (boolean) vector operands is true. It produces the reduced value across
1293 // all unrolled iterations. Unrolling will add all copies of its original
1294 // operand as additional operands. AnyOf is poison-safe as all operands
1295 // will be frozen.
1297 // Calculates the first active lane index of the vector predicate operands.
1298 // It produces the lane index across all unrolled iterations. Unrolling will
1299 // add all copies of its original operand as additional operands.
1300 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1301 // result even with operands that are all zeroes.
1303 // Calculates the last active lane index of the vector predicate operands.
1304 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1305 // tail-folding to extract the correct live-out value from the last active
1306 // iteration. It produces the lane index across all unrolled iterations.
1307 // Unrolling will add all copies of its original operand as additional
1308 // operands.
1310 // Returns a reversed vector for the operand.
1312 /// Start vector for reductions with 3 operands: the original start value,
1313 /// the identity value for the reduction and an integer indicating the
1314 /// scaling factor.
1316 /// Extracts a single lane (first operand) from a set of vector operands.
1317 /// The lane specifies an index into a vector formed by combining all vector
1318 /// operands (all operands after the first one).
1320 /// Explicit user for the resume phi of the canonical induction in the main
1321 /// VPlan, used by the epilogue vector loop.
1323 /// Extracts the last active lane from a set of vectors. The first operand
1324 /// is the default value if no lanes in the masks are active. Conceptually,
1325 /// this concatenates all data vectors (odd operands), concatenates all
1326 /// masks (even operands -- ignoring the default value), and returns the
1327 /// last active value from the combined data vector using the combined mask.
1329 /// Compute the exiting value of a wide induction after vectorization, that
1330 /// is the value of the last lane of the induction increment (i.e. its
1331 /// backedge value). Has the wide induction recipe as operand.
1334
1335 // The opcodes below are used for VPInstructionWithType.
1336 // NOTE: VPInstructionWithType classes are also used for:
1337 // 1. All CastInst variants - see createVPInstructionsForVPBB, and other
1338 // cases where createScalarCast, createScalarZExtOrTrunc and
1339 // createScalarSExtOrTrunc are invoked.
1340 // 2. Scalar load instructions - see createVPInstructionsForVPBB.
1341
1342 /// Scale the first operand (vector step) by the second operand
1343 /// (scalar-step). Casts both operands to the result type if needed.
1345 // Creates a step vector starting from 0 to VF with a step of 1.
1347 /// Returns the value for vscale.
1349
1351 };
1352
1353 /// Returns true if this VPInstruction generates scalar values for all lanes.
1354 /// Most VPInstructions generate a single value per part, either vector or
1355 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1356 /// values per all lanes, stemming from an original ingredient. This method
1357 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1358 /// underlying ingredient.
1359 bool doesGeneratePerAllLanes() const;
1360
1361 /// Return the number of operands determined by the opcode of the
1362 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1363 /// cannot be determined directly by the opcode.
1364 unsigned getNumOperandsForOpcode() const;
1365
1366private:
1367 typedef unsigned char OpcodeTy;
1368 OpcodeTy Opcode;
1369
1370 /// An optional name that can be used for the generated IR instruction.
1371 std::string Name;
1372
1373 /// Returns true if we can generate a scalar for the first lane only if
1374 /// needed.
1375 bool canGenerateScalarForFirstLane() const;
1376
1377 /// Utility methods serving execute(): generates a single vector instance of
1378 /// the modeled instruction. \returns the generated value. . In some cases an
1379 /// existing value is returned rather than a generated one.
1380 Value *generate(VPTransformState &State);
1381
1382 /// Returns true if the VPInstruction does not need masking.
1383 bool alwaysUnmasked() const {
1384 if (Opcode == VPInstruction::MaskedCond)
1385 return false;
1386
1387 // For now only VPInstructions with underlying values use masks.
1388 // TODO: provide masks to VPInstructions w/o underlying values.
1389 if (!getUnderlyingValue())
1390 return true;
1391
1392 return Instruction::isCast(Opcode) || Opcode == Instruction::PHI ||
1393 Opcode == Instruction::GetElementPtr;
1394 }
1395
1396public:
1397 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1398 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1399 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "",
1400 Type *ResultTy = nullptr);
1401
1402 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1403
1404 VPInstruction *clone() override {
1406 }
1407
1409 Type *ResultTy = nullptr) {
1410 auto *New = new VPInstruction(Opcode, NewOperands, *this, *this,
1411 getDebugLoc(), Name, ResultTy);
1412 if (getUnderlyingValue())
1413 New->setUnderlyingValue(getUnderlyingInstr());
1414 return New;
1415 }
1416
1417 unsigned getOpcode() const { return Opcode; }
1418
1419 /// Add \p Op as operand of this VPInstruction. Only supported for AnyOf,
1420 /// ComputeReductionResult, BuildVector, BuildStructVector, ExtractLane,
1421 /// ExtractLastActive, FirstActiveLane, LastActiveLane.
1422 void addOperand(VPValue *Op);
1423
1424 /// Generate the instruction.
1425 /// TODO: We currently execute only per-part unless a specific instance is
1426 /// provided.
1427 void execute(VPTransformState &State) override;
1428
1429 /// Return the cost of this VPInstruction.
1430 InstructionCost computeCost(ElementCount VF,
1431 VPCostContext &Ctx) const override;
1432
1433#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1434 /// Print the VPInstruction to dbgs() (for debugging).
1435 LLVM_DUMP_METHOD void dump() const;
1436#endif
1437
1438 bool hasResult() const {
1439 // CallInst may or may not have a result, depending on the called function.
1440 // Conservatively return calls have results for now.
1441 switch (getOpcode()) {
1442 case Instruction::Ret:
1443 case Instruction::UncondBr:
1444 case Instruction::CondBr:
1445 case Instruction::Store:
1446 case Instruction::Switch:
1447 case Instruction::IndirectBr:
1448 case Instruction::Resume:
1449 case Instruction::CatchRet:
1450 case Instruction::Unreachable:
1451 case Instruction::Fence:
1452 case Instruction::AtomicRMW:
1456 return false;
1457 default:
1458 return true;
1459 }
1460 }
1461
1462 /// Returns true if the VPInstruction has a mask operand.
1463 bool isMasked() const {
1464 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1465 // VPInstructions without a fixed number of operands cannot be masked.
1466 if (NumOpsForOpcode == -1u)
1467 return false;
1468 return NumOpsForOpcode + 1 == getNumOperands();
1469 }
1470
1471 /// Returns the number of operands, excluding the mask if the VPInstruction is
1472 /// masked.
1473 unsigned getNumOperandsWithoutMask() const {
1474 return getNumOperands() - isMasked();
1475 }
1476
1477 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1478 void addMask(VPValue *Mask) {
1479 assert(!isMasked() && "recipe is already masked");
1480 if (alwaysUnmasked())
1481 return;
1482 assert(Mask->getScalarType()->isIntegerTy(1) &&
1483 "Mask must be an i1 (vector)");
1484 VPUser::addOperand(Mask);
1485 }
1486
1487 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1488 /// VPInstructions.
1489 VPValue *getMask() const {
1490 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1491 }
1492
1493 /// Returns an iterator range over the operands excluding the mask operand
1494 /// if present.
1501
1502 /// Returns true if the underlying opcode may read from or write to memory.
1503 bool opcodeMayReadOrWriteFromMemory() const;
1504
1505 /// Returns true if the recipe only uses the first lane of operand \p Op.
1506 bool usesFirstLaneOnly(const VPValue *Op) const override;
1507
1508 /// Returns true if the recipe only uses the first part of operand \p Op.
1509 bool usesFirstPartOnly(const VPValue *Op) const override;
1510
1511 /// Returns true if this VPInstruction produces a scalar value from a vector,
1512 /// e.g. by performing a reduction or extracting a lane.
1513 bool isVectorToScalar() const;
1514
1515 /// Returns true if this VPInstruction's operands are single scalars and the
1516 /// result is also a single scalar.
1517 bool isSingleScalar() const;
1518
1519 /// Returns the symbolic name assigned to the VPInstruction.
1520 StringRef getName() const { return Name; }
1521
1522 /// Set the symbolic name for the VPInstruction.
1523 void setName(StringRef NewName) { Name = NewName.str(); }
1524
1525protected:
1526#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1527 /// Print the VPInstruction to \p O.
1528 void printRecipe(raw_ostream &O, const Twine &Indent,
1529 VPSlotTracker &SlotTracker) const override;
1530#endif
1531};
1532
1533/// A specialization of VPInstruction augmenting it with a dedicated result
1534/// type, to be used when the opcode and operands of the VPInstruction don't
1535/// directly determine the result type. Note that there is no separate recipe ID
1536/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1537/// distinguished purely by the opcode.
1538/// TODO: Merge with VPInstruction, now that VPRecipeValue provides the type.
1540public:
1542 Type *ResultTy, const VPIRFlags &Flags = {},
1543 const VPIRMetadata &Metadata = {},
1545 const Twine &Name = "", Value *UV = nullptr)
1546 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) {
1548 }
1549
1550 static inline bool classof(const VPRecipeBase *R) {
1551 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1552 // type information.
1553 auto *VPI = dyn_cast<VPInstruction>(R);
1554 if (!VPI)
1555 return false;
1556 unsigned Opc = VPI->getOpcode();
1558 return true;
1559 switch (Opc) {
1563 case Instruction::Load:
1564 return true;
1565 default:
1566 return false;
1567 }
1568 }
1569
1570 static inline bool classof(const VPUser *R) {
1572 }
1573
1574 VPInstruction *clone() override {
1575 auto *New =
1577 *this, *this, getDebugLoc(), getName());
1578 New->setUnderlyingValue(getUnderlyingValue());
1579 return New;
1580 }
1581
1582 void execute(VPTransformState &State) override;
1583
1584 /// Return the cost of this VPInstruction.
1586 VPCostContext &Ctx) const override;
1587
1588 Type *getResultType() const { return getScalarType(); }
1589
1590 /// Cast recipes always use scalars of their operand.
1591 bool usesScalars(const VPValue *Op) const override {
1593 return true;
1595 }
1596
1597protected:
1598#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1599 /// Print the recipe.
1600 void printRecipe(raw_ostream &O, const Twine &Indent,
1601 VPSlotTracker &SlotTracker) const override;
1602#endif
1603};
1604
1605/// Helper type to provide functions to access incoming values and blocks for
1606/// phi-like recipes.
1608protected:
1609 /// Return a VPRecipeBase* to the current object.
1610 virtual const VPRecipeBase *getAsRecipe() const = 0;
1611
1612public:
1613 virtual ~VPPhiAccessors() = default;
1614
1615 /// Returns the incoming VPValue with index \p Idx.
1616 VPValue *getIncomingValue(unsigned Idx) const {
1617 return getAsRecipe()->getOperand(Idx);
1618 }
1619
1620 /// Returns the incoming block with index \p Idx.
1621 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1622
1623 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1624 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1625
1626 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1627 /// block.
1628 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1629
1630 /// Returns the number of incoming values, also number of incoming blocks.
1631 virtual unsigned getNumIncoming() const {
1632 return getAsRecipe()->getNumOperands();
1633 }
1634
1635 /// Returns an interator range over the incoming values.
1637 return make_range(getAsRecipe()->op_begin(),
1638 getAsRecipe()->op_begin() + getNumIncoming());
1639 }
1640
1642 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1643
1644 /// Returns an iterator range over the incoming blocks.
1646 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1647 return getIncomingBlock(Idx);
1648 };
1649 return map_range(index_range(0, getNumIncoming()), GetBlock);
1650 }
1651
1652 /// Returns an iterator range over pairs of incoming values and corresponding
1653 /// incoming blocks.
1659
1660 /// Removes the incoming value for \p IncomingBlock, which must be a
1661 /// predecessor.
1662 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1663
1664 /// Append \p IncomingV as an incoming value to the phi-like recipe.
1665 void addIncoming(VPValue *IncomingV) {
1666 auto *R = const_cast<VPRecipeBase *>(getAsRecipe());
1667 assert((R->getNumOperands() == 0 ||
1668 IncomingV->getScalarType() == R->getOperand(0)->getScalarType()) &&
1669 "all incoming values must have the same type");
1670 R->addOperand(IncomingV);
1671 }
1672
1673#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1674 /// Print the recipe.
1676#endif
1677};
1678
1681 const Twine &Name = "", Type *ResultTy = nullptr)
1682 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name,
1683 ResultTy) {}
1684
1685 static inline bool classof(const VPUser *U) {
1686 auto *VPI = dyn_cast<VPInstruction>(U);
1687 return VPI && VPI->getOpcode() == Instruction::PHI;
1688 }
1689
1690 static inline bool classof(const VPValue *V) {
1691 auto *VPI = dyn_cast<VPInstruction>(V);
1692 return VPI && VPI->getOpcode() == Instruction::PHI;
1693 }
1694
1695 static inline bool classof(const VPSingleDefRecipe *SDR) {
1696 auto *VPI = dyn_cast<VPInstruction>(SDR);
1697 return VPI && VPI->getOpcode() == Instruction::PHI;
1698 }
1699
1700 VPPhi *clone() override {
1701 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1702 PhiR->setUnderlyingValue(getUnderlyingValue());
1703 return PhiR;
1704 }
1705
1706 void execute(VPTransformState &State) override;
1707
1708protected:
1709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1710 /// Print the recipe.
1711 void printRecipe(raw_ostream &O, const Twine &Indent,
1712 VPSlotTracker &SlotTracker) const override;
1713#endif
1714
1715 const VPRecipeBase *getAsRecipe() const override { return this; }
1716};
1717
1718/// A recipe to wrap on original IR instruction not to be modified during
1719/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1720/// Expect PHIs, VPIRInstructions cannot have any operands.
1722 Instruction &I;
1723
1724protected:
1725 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1726 /// subclasses may need to be created, e.g. VPIRPhi.
1728 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1729
1730public:
1731 ~VPIRInstruction() override = default;
1732
1733 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1734 /// VPIRInstruction.
1736
1737 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1738
1740 auto *R = create(I);
1741 for (auto *Op : operands())
1742 R->addOperand(Op);
1743 return R;
1744 }
1745
1746 void execute(VPTransformState &State) override;
1747
1748 /// Return the cost of this VPIRInstruction.
1750 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1751
1752 Instruction &getInstruction() const { return I; }
1753
1754 bool usesScalars(const VPValue *Op) const override {
1756 "Op must be an operand of the recipe");
1757 return true;
1758 }
1759
1760 bool usesFirstPartOnly(const VPValue *Op) const override {
1762 "Op must be an operand of the recipe");
1763 return true;
1764 }
1765
1766 bool usesFirstLaneOnly(const VPValue *Op) const override {
1768 "Op must be an operand of the recipe");
1769 return true;
1770 }
1771
1772protected:
1773#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1774 /// Print the recipe.
1775 void printRecipe(raw_ostream &O, const Twine &Indent,
1776 VPSlotTracker &SlotTracker) const override;
1777#endif
1778};
1779
1780/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1781/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1782/// allowed, and it is used to add a new incoming value for the single
1783/// predecessor VPBB.
1785 public VPPhiAccessors {
1787
1788 static inline bool classof(const VPRecipeBase *U) {
1789 auto *R = dyn_cast<VPIRInstruction>(U);
1790 return R && isa<PHINode>(R->getInstruction());
1791 }
1792
1793 static inline bool classof(const VPUser *U) {
1794 auto *R = dyn_cast<VPRecipeBase>(U);
1795 return R && classof(R);
1796 }
1797
1799
1800 void execute(VPTransformState &State) override;
1801
1802protected:
1803#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1804 /// Print the recipe.
1805 void printRecipe(raw_ostream &O, const Twine &Indent,
1806 VPSlotTracker &SlotTracker) const override;
1807#endif
1808
1809 const VPRecipeBase *getAsRecipe() const override { return this; }
1810};
1811
1812/// VPWidenRecipe is a recipe for producing a widened instruction using the
1813/// opcode and operands of the recipe. This recipe covers most of the
1814/// traditional vectorization cases where each recipe transforms into a
1815/// vectorized version of itself.
1817 public VPIRMetadata {
1818 unsigned Opcode;
1819
1820public:
1822 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1823 DebugLoc DL = {})
1824 : VPWidenRecipe(I.getOpcode(), Operands, Flags, Metadata, DL) {
1825 setUnderlyingValue(&I);
1826 }
1827
1828 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1829 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1830 DebugLoc DL = {})
1831 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands,
1832 computeScalarTypeForInstruction(Opcode, Operands),
1833 Flags, DL),
1834 VPIRMetadata(Metadata), Opcode(Opcode) {}
1835
1836 ~VPWidenRecipe() override = default;
1837
1839
1841 if (auto *UV = getUnderlyingValue())
1842 return new VPWidenRecipe(*cast<Instruction>(UV), NewOperands, *this,
1843 *this, getDebugLoc());
1844 return new VPWidenRecipe(Opcode, NewOperands, *this, *this, getDebugLoc());
1845 }
1846
1847 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1848
1849 /// Produce a widened instruction using the opcode and operands of the recipe,
1850 /// processing State.VF elements.
1851 void execute(VPTransformState &State) override;
1852
1853 /// Return the cost of this VPWidenRecipe.
1854 InstructionCost computeCost(ElementCount VF,
1855 VPCostContext &Ctx) const override;
1856
1857 unsigned getOpcode() const { return Opcode; }
1858
1859protected:
1860#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1861 /// Print the recipe.
1862 void printRecipe(raw_ostream &O, const Twine &Indent,
1863 VPSlotTracker &SlotTracker) const override;
1864#endif
1865
1866 /// Returns true if the recipe only uses the first lane of operand \p Op.
1867 bool usesFirstLaneOnly(const VPValue *Op) const override {
1869 "Op must be an operand of the recipe");
1870 return Opcode == Instruction::Select && Op == getOperand(0) &&
1871 Op->isDefinedOutsideLoopRegions();
1872 }
1873};
1874
1875/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1876/// TODO: Merge with VPWidenRecipe now that type is associated to every
1877/// VPRecipeValue.
1879 /// Cast instruction opcode.
1880 Instruction::CastOps Opcode;
1881
1882public:
1884 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1885 const VPIRMetadata &Metadata = {},
1887 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, ResultTy, Flags,
1888 DL),
1889 VPIRMetadata(Metadata), Opcode(Opcode) {
1890 assert(flagsValidForOpcode(Opcode) &&
1891 "Set flags not supported for the provided opcode");
1893 "Opcode requires specific flags to be set");
1895 }
1896
1897 ~VPWidenCastRecipe() override = default;
1898
1900 return new VPWidenCastRecipe(Opcode, getOperand(0), getScalarType(),
1902 *this, *this, getDebugLoc());
1903 }
1904
1905 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1906
1907 /// Produce widened copies of the cast.
1908 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1909
1910 /// Return the cost of this VPWidenCastRecipe.
1912 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1913
1914 Instruction::CastOps getOpcode() const { return Opcode; }
1915
1916protected:
1917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1918 /// Print the recipe.
1919 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1920 VPSlotTracker &SlotTracker) const override;
1921#endif
1922};
1923
1924/// A recipe for widening vector intrinsics.
1926 /// ID of the vector intrinsic to widen.
1927 Intrinsic::ID VectorIntrinsicID;
1928
1929 /// True if the intrinsic may read from memory.
1930 bool MayReadFromMemory;
1931
1932 /// True if the intrinsic may read write to memory.
1933 bool MayWriteToMemory;
1934
1935 /// True if the intrinsic may have side-effects.
1936 bool MayHaveSideEffects;
1937
1938protected:
1939 VPWidenIntrinsicRecipe(const unsigned char SC,
1940 Intrinsic::ID VectorIntrinsicID,
1941 ArrayRef<VPValue *> CallArguments, Type *Ty,
1942 const VPIRFlags &Flags = {},
1943 const VPIRMetadata &MD = {},
1945 : VPRecipeWithIRFlags(SC, CallArguments, Ty, Flags, DL), VPIRMetadata(MD),
1946 VectorIntrinsicID(VectorIntrinsicID) {
1947 LLVMContext &Ctx = Ty->getContext();
1948 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1949 MemoryEffects ME = Attrs.getMemoryEffects();
1950 MayReadFromMemory = !ME.onlyWritesMemory();
1951 MayWriteToMemory = !ME.onlyReadsMemory();
1952 MayHaveSideEffects = MayWriteToMemory ||
1953 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1954 !Attrs.hasAttribute(Attribute::WillReturn);
1955 }
1956
1957 /// Helper function to produce the widened intrinsic call.
1958 CallInst *createVectorCall(VPTransformState &State);
1959
1960public:
1962 ArrayRef<VPValue *> CallArguments, Type *Ty,
1963 const VPIRFlags &Flags = {},
1964 const VPIRMetadata &MD = {},
1966 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments, Ty,
1967 Flags, DL),
1968 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID),
1969 MayReadFromMemory(CI.mayReadFromMemory()),
1970 MayWriteToMemory(CI.mayWriteToMemory()),
1971 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1972 setUnderlyingValue(&CI);
1973 }
1974
1976 ArrayRef<VPValue *> CallArguments, Type *Ty,
1977 const VPIRFlags &Flags = {},
1978 const VPIRMetadata &Metadata = {},
1980 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenIntrinsicSC,
1981 VectorIntrinsicID, CallArguments, Ty, Flags,
1982 Metadata, DL) {}
1983
1984 ~VPWidenIntrinsicRecipe() override = default;
1985
1987 if (Value *CI = getUnderlyingValue())
1988 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1989 operands(), getScalarType(), *this,
1990 *this, getDebugLoc());
1991 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(),
1992 getScalarType(), *this, *this,
1993 getDebugLoc());
1994 }
1995
1996 static inline bool classof(const VPRecipeBase *R) {
1997 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1998 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC;
1999 }
2000
2001 static inline bool classof(const VPUser *U) {
2002 auto *R = dyn_cast<VPRecipeBase>(U);
2003 return R && classof(R);
2004 }
2005
2006 static inline bool classof(const VPValue *V) {
2007 auto *R = V->getDefiningRecipe();
2008 return R && classof(R);
2009 }
2010
2011 static inline bool classof(const VPSingleDefRecipe *R) {
2012 return classof(static_cast<const VPRecipeBase *>(R));
2013 }
2014
2015 /// Produce a widened version of the vector intrinsic.
2016 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
2017
2018 /// Compute the cost of a vector intrinsic with \p ID and \p Operands.
2021 const VPRecipeWithIRFlags &R,
2022 ElementCount VF, VPCostContext &Ctx);
2023
2024 /// Return the cost of this vector intrinsic.
2026 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
2027
2028 /// Return the ID of the intrinsic.
2029 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
2030
2031 /// Return to name of the intrinsic as string.
2033
2034 /// Returns true if the intrinsic may read from memory.
2035 bool mayReadFromMemory() const { return MayReadFromMemory; }
2036
2037 /// Returns true if the intrinsic may write to memory.
2038 bool mayWriteToMemory() const { return MayWriteToMemory; }
2039
2040 /// Returns true if the intrinsic may have side-effects.
2041 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
2042
2043 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
2044
2045protected:
2046#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2047 /// Print the recipe.
2048 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
2049 VPSlotTracker &SlotTracker) const override;
2050#endif
2051};
2052
2053/// A recipe for widening vector memory intrinsics.
2055 /// Alignment information for this memory access.
2056 Align Alignment;
2057
2058public:
2059 // TODO: support StoreInst for strided store
2061 ArrayRef<VPValue *> CallArguments, Type *Ty,
2062 Align Alignment, const VPIRMetadata &MD = {},
2064 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenMemIntrinsicSC,
2065 VectorIntrinsicID, CallArguments, Ty, {}, MD,
2066 DL),
2067 Alignment(Alignment) {
2068 assert(VectorIntrinsicID == Intrinsic::experimental_vp_strided_load &&
2069 "Unexpected intrinsic");
2070 }
2071
2072 ~VPWidenMemIntrinsicRecipe() override = default;
2073
2076 getScalarType(), Alignment, *this,
2077 getDebugLoc());
2078 }
2079
2080 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenMemIntrinsicSC)
2081
2082 /// Produce a widened version of the vector memory intrinsic.
2083 void execute(VPTransformState &State) override;
2084
2085 /// Helper function for computing the cost of vector memory intrinsic.
2087 bool IsMasked, Align Alignment,
2088 VPCostContext &Ctx);
2089
2090 /// Return the cost of this vector memory intrinsic.
2092 VPCostContext &Ctx) const override;
2093};
2094
2095/// A recipe for widening Call instructions using library calls.
2097 public VPIRMetadata {
2098 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
2099 /// between a given VF and the chosen vectorized variant, so there will be a
2100 /// different VPlan for each VF with a valid variant.
2101 Function *Variant;
2102
2103public:
2105 ArrayRef<VPValue *> CallArguments,
2106 const VPIRFlags &Flags = {},
2107 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
2108 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments,
2109 toScalarizedTy(Variant->getReturnType()), Flags,
2110 DL),
2111 VPIRMetadata(Metadata), Variant(Variant) {
2112 setUnderlyingValue(UV);
2113 assert(
2114 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2115 "last operand must be the called function");
2116 assert(cast<Function>(CallArguments.back()->getLiveInIRValue())
2117 ->getReturnType() == getScalarType() &&
2118 "Scalar type must match return type of called scalar function");
2119 }
2120
2121 ~VPWidenCallRecipe() override = default;
2122
2124 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2125 *this, *this, getDebugLoc());
2126 }
2127
2128 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2129
2130 /// Produce a widened version of the call instruction.
2131 void execute(VPTransformState &State) override;
2132
2133 /// Return the cost of this VPWidenCallRecipe.
2134 InstructionCost computeCost(ElementCount VF,
2135 VPCostContext &Ctx) const override;
2136
2137 /// Return the cost of widening a call using the vector function \p Variant.
2138 static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx);
2139
2143
2146
2147 /// Returns true if the recipe only uses the first lane of operand \p Op.
2148 bool usesFirstLaneOnly(const VPValue *Op) const override;
2149
2150protected:
2151#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2152 /// Print the recipe.
2153 void printRecipe(raw_ostream &O, const Twine &Indent,
2154 VPSlotTracker &SlotTracker) const override;
2155#endif
2156};
2157
2158/// A recipe representing a sequence of load -> update -> store as part of
2159/// a histogram operation. This means there may be aliasing between vector
2160/// lanes, which is handled by the llvm.experimental.vector.histogram family
2161/// of intrinsics. The only update operations currently supported are
2162/// 'add' and 'sub' where the other term is loop-invariant.
2164 /// Opcode of the update operation, currently either add or sub.
2165 unsigned Opcode;
2166
2167public:
2168 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2169 const VPIRMetadata &Metadata = {},
2171 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2172 VPIRMetadata(Metadata), Opcode(Opcode) {}
2173
2174 ~VPHistogramRecipe() override = default;
2175
2177 return new VPHistogramRecipe(Opcode, operands(), *this, getDebugLoc());
2178 }
2179
2180 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2181
2182 /// Produce a vectorized histogram operation.
2183 void execute(VPTransformState &State) override;
2184
2185 /// Return the cost of this VPHistogramRecipe.
2187 VPCostContext &Ctx) const override;
2188
2189 unsigned getOpcode() const { return Opcode; }
2190
2191 /// Return the mask operand if one was provided, or a null pointer if all
2192 /// lanes should be executed unconditionally.
2193 VPValue *getMask() const {
2194 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2195 }
2196
2197protected:
2198#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2199 /// Print the recipe
2200 void printRecipe(raw_ostream &O, const Twine &Indent,
2201 VPSlotTracker &SlotTracker) const override;
2202#endif
2203};
2204
2205/// A recipe for handling GEP instructions.
2207 Type *SourceElementTy;
2208
2209public:
2210 VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef<VPValue *> Operands,
2211 const VPIRFlags &Flags = {},
2213 GetElementPtrInst *UV = nullptr)
2214 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands,
2215 Operands[0]->getScalarType(), Flags, DL),
2216 SourceElementTy(SourceElementTy) {
2217 if (UV) {
2218 setUnderlyingValue(UV);
2221 assert(Metadata.empty() && "unexpected metadata on GEP");
2222 }
2223 }
2224
2225 ~VPWidenGEPRecipe() override = default;
2226
2232
2233 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2234
2235 /// This recipe generates a GEP instruction.
2236 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2237
2238 /// Generate the gep nodes.
2239 void execute(VPTransformState &State) override;
2240
2241 Type *getSourceElementType() const { return SourceElementTy; }
2242
2243 /// Return the cost of this VPWidenGEPRecipe.
2245 VPCostContext &Ctx) const override {
2246 // TODO: Compute accurate cost after retiring the legacy cost model.
2247 return 0;
2248 }
2249
2250 /// Returns true if the recipe only uses the first lane of operand \p Op.
2251 bool usesFirstLaneOnly(const VPValue *Op) const override;
2252
2253protected:
2254#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2255 /// Print the recipe.
2256 void printRecipe(raw_ostream &O, const Twine &Indent,
2257 VPSlotTracker &SlotTracker) const override;
2258#endif
2259};
2260
2261/// A recipe to compute a pointer to the last element of each part of a widened
2262/// memory access for widened memory accesses of SourceElementTy. Used for
2263/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2264/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2265/// unroller otherwise.
2267 Type *SourceElementTy;
2268
2269 /// The constant stride of the pointer computed by this recipe, expressed in
2270 /// units of SourceElementTy.
2271 int64_t Stride;
2272
2273public:
2274 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2275 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2276 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2277 Ptr->getScalarType(), GEPFlags, DL),
2278 SourceElementTy(SourceElementTy), Stride(Stride) {
2279 assert(Stride < 0 && "Stride must be negative");
2280 }
2281
2282 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2283
2284 Type *getSourceElementType() const { return SourceElementTy; }
2285 int64_t getStride() const { return Stride; }
2286 VPValue *getPointer() const { return getOperand(0); }
2287 VPValue *getVFValue() const { return getOperand(1); }
2289 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2290 }
2291
2292 /// Adds the offset operand to the recipe.
2293 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2294 void materializeOffset(unsigned Part = 0);
2295
2296 /// Append \p Offset as the offset operand. The offset is an integer index
2297 /// expressed in units of SourceElementTy.
2299 assert(Offset->getScalarType()->isIntegerTy() &&
2300 "offset must be an integer index");
2302 }
2303
2304 void execute(VPTransformState &State) override;
2305
2306 bool usesFirstLaneOnly(const VPValue *Op) const override {
2308 "Op must be an operand of the recipe");
2309 return true;
2310 }
2311
2312 /// Return the cost of this VPVectorPointerRecipe.
2314 VPCostContext &Ctx) const override {
2315 // TODO: Compute accurate cost after retiring the legacy cost model.
2316 return 0;
2317 }
2318
2319 /// Returns true if the recipe only uses the first part of operand \p Op.
2320 bool usesFirstPartOnly(const VPValue *Op) const override {
2322 "Op must be an operand of the recipe");
2323 assert(getNumOperands() <= 2 && "must have at most two operands");
2324 return true;
2325 }
2326
2328 auto *VEPR = new VPVectorEndPointerRecipe(
2331 if (auto *Offset = getOffset())
2332 VEPR->addOffset(Offset);
2333 return VEPR;
2334 }
2335
2336protected:
2337#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2338 /// Print the recipe.
2339 void printRecipe(raw_ostream &O, const Twine &Indent,
2340 VPSlotTracker &SlotTracker) const override;
2341#endif
2342};
2343
2344/// A recipe to compute the pointers for widened memory accesses of \p
2345/// SourceElementTy, with the \p Stride expressed in units of \p
2346/// SourceElementTy. Unrolling adds an extra \p VFxPart operand for unrolled
2347/// parts > 0 and it produces `GEP SourceElementTy Ptr, VFxPart * Stride`.
2349 Type *SourceElementTy;
2350
2351public:
2352 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
2353 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2354 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC,
2355 ArrayRef<VPValue *>({Ptr, Stride}),
2356 Ptr->getScalarType(), GEPFlags, DL),
2357 SourceElementTy(SourceElementTy) {}
2358
2359 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2360
2361 VPValue *getStride() const { return getOperand(1); }
2362
2364 return getNumOperands() > 2 ? getOperand(2) : nullptr;
2365 }
2366
2367 /// Add the per-part offset (VFxPart) used for unrolled parts > 0.
2368 void addPerPartOffset(VPValue *VFxPart) {
2369 assert(VFxPart->getScalarType()->isIntegerTy() &&
2370 "per-part offset must be an integer index");
2371 VPUser::addOperand(VFxPart);
2372 }
2373
2374 void execute(VPTransformState &State) override;
2375
2376 Type *getSourceElementType() const { return SourceElementTy; }
2377
2378 bool usesFirstLaneOnly(const VPValue *Op) const override {
2380 "Op must be an operand of the recipe");
2381 return true;
2382 }
2383
2384 /// Returns true if the recipe only uses the first part of operand \p Op.
2385 bool usesFirstPartOnly(const VPValue *Op) const override {
2387 "Op must be an operand of the recipe");
2388 assert(getNumOperands() <= 2 && "must have at most two operands");
2389 return true;
2390 }
2391
2393 auto *Clone =
2394 new VPVectorPointerRecipe(getOperand(0), SourceElementTy, getStride(),
2396 if (auto *VFxPart = getVFxPart())
2397 Clone->addPerPartOffset(VFxPart);
2398 return Clone;
2399 }
2400
2401 /// Return the cost of this VPHeaderPHIRecipe.
2403 VPCostContext &Ctx) const override {
2404 // TODO: Compute accurate cost after retiring the legacy cost model.
2405 return 0;
2406 }
2407
2408protected:
2409#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2410 /// Print the recipe.
2411 void printRecipe(raw_ostream &O, const Twine &Indent,
2412 VPSlotTracker &SlotTracker) const override;
2413#endif
2414};
2415
2416/// A pure virtual base class for all recipes modeling header phis, including
2417/// phis for first order recurrences, pointer inductions and reductions. The
2418/// start value is the first operand of the recipe and the incoming value from
2419/// the backedge is the second operand.
2420///
2421/// Inductions are modeled using the following sub-classes:
2422/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2423/// floating point inductions with arbitrary start and step values. Produces
2424/// a vector PHI per-part.
2425/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2426/// pointer induction. Produces either a vector PHI per-part or scalar values
2427/// per-lane based on the canonical induction.
2428/// * VPFirstOrderRecurrencePHIRecipe
2429/// * VPReductionPHIRecipe
2430/// * VPActiveLaneMaskPHIRecipe
2431/// * VPEVLBasedIVPHIRecipe
2432///
2433/// Note that the canonical IV is modeled as a VPRegionValue associated with
2434/// its loop region.
2436 public VPPhiAccessors {
2437protected:
2438 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2439 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2440 : VPHeaderPHIRecipe(VPRecipeID, UnderlyingInstr, Start,
2441 Start->getScalarType(), DL) {}
2442
2443 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2444 VPValue *Start, Type *ResultTy, DebugLoc DL)
2445 : VPSingleDefRecipe(VPRecipeID, Start, ResultTy, UnderlyingInstr, DL) {}
2446
2447 const VPRecipeBase *getAsRecipe() const override { return this; }
2448
2449public:
2450 ~VPHeaderPHIRecipe() override = default;
2451
2452 /// Method to support type inquiry through isa, cast, and dyn_cast.
2453 static inline bool classof(const VPRecipeBase *R) {
2454 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2455 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2456 }
2457 static inline bool classof(const VPValue *V) {
2458 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2459 }
2460 static inline bool classof(const VPSingleDefRecipe *R) {
2461 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2462 }
2463
2464 /// Generate the phi nodes.
2465 void execute(VPTransformState &State) override = 0;
2466
2467 /// Return the cost of this header phi recipe.
2469 VPCostContext &Ctx) const override;
2470
2471 /// Returns the start value of the phi, if one is set.
2473 return getNumOperands() == 0 ? nullptr : getOperand(0);
2474 }
2476 return getNumOperands() == 0 ? nullptr : getOperand(0);
2477 }
2478
2479 /// Update the start value of the recipe.
2481
2482 /// Returns the incoming value from the loop backedge.
2484 return getOperand(1);
2485 }
2486
2487 /// Update the incoming value from the loop backedge.
2489
2490 /// Add \p V as the incoming value from the loop backedge.
2492 assert(getNumOperands() == 1 &&
2493 "backedge value must be appended right after construction");
2494 assert(V->getScalarType() == getScalarType() &&
2495 "backedge value must have the same type as the start value");
2497 }
2498
2499protected:
2500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2501 /// Print the recipe.
2502 void printRecipe(raw_ostream &O, const Twine &Indent,
2503 VPSlotTracker &SlotTracker) const override = 0;
2504#endif
2505};
2506
2507/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2508/// VPWidenPointerInductionRecipe), providing shared functionality, including
2509/// retrieving the step value, induction descriptor and original phi node.
2511 InductionDescriptor IndDesc;
2512
2513public:
2514 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2515 VPValue *Step, const InductionDescriptor &IndDesc,
2516 DebugLoc DL)
2517 : VPWidenInductionRecipe(Kind, IV, Start, Step, IndDesc,
2518 Start->getScalarType(), DL) {}
2519
2520 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2521 VPValue *Step, const InductionDescriptor &IndDesc,
2522 Type *ResultTy, DebugLoc DL)
2523 : VPHeaderPHIRecipe(Kind, IV, Start, ResultTy, DL), IndDesc(IndDesc) {
2524 addOperand(Step);
2525 }
2526
2527 /// After unrolling, append the splat-VF step (`VF * step`) and the value of
2528 /// the induction at the last unrolled part.
2529 void addUnrolledPartOperands(VPValue *SplatVFStep, VPValue *LastPart) {
2530 assert(LastPart->getScalarType() == getScalarType() &&
2531 "last-part value must match the induction recipe's scalar type");
2533 ? SplatVFStep->getScalarType()->isIntegerTy()
2534 : SplatVFStep->getScalarType() == getScalarType()) &&
2535 "splat-step must match the induction type for non-pointer "
2536 "inductions, or be an integer index for pointer inductions");
2537 VPUser::addOperand(SplatVFStep);
2538 VPUser::addOperand(LastPart);
2539 }
2540
2541 static inline bool classof(const VPRecipeBase *R) {
2542 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2543 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2544 }
2545
2546 static inline bool classof(const VPValue *V) {
2547 auto *R = V->getDefiningRecipe();
2548 return R && classof(R);
2549 }
2550
2551 static inline bool classof(const VPSingleDefRecipe *R) {
2552 return classof(static_cast<const VPRecipeBase *>(R));
2553 }
2554
2555 void execute(VPTransformState &State) override = 0;
2556
2557 /// Returns the start value of the induction.
2559
2560 /// Returns the step value of the induction.
2562 const VPValue *getStepValue() const { return getOperand(1); }
2563
2564 /// Update the step value of the recipe.
2565 void setStepValue(VPValue *V) { setOperand(1, V); }
2566
2568 const VPValue *getVFValue() const { return getOperand(2); }
2569
2570 /// Returns the number of incoming values, also number of incoming blocks.
2571 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2572 /// incoming value, its start value.
2573 unsigned getNumIncoming() const override { return 1; }
2574
2575 /// Returns the underlying PHINode if one exists, or null otherwise.
2579
2580 /// Returns the induction descriptor for the recipe.
2581 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2582
2583 /// Returns the SCEV predicates associated with this induction.
2585 return IndDesc.getNoWrapPredicates();
2586 }
2587
2589 // TODO: All operands of base recipe must exist and be at same index in
2590 // derived recipe.
2592 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2593 }
2594
2595 /// Returns true if the recipe only uses the first lane of operand \p Op.
2596 bool usesFirstLaneOnly(const VPValue *Op) const override {
2598 "Op must be an operand of the recipe");
2599 // The recipe creates its own wide start value, so it only requests the
2600 // first lane of the operand.
2601 // TODO: Remove once creating the start value is modeled separately.
2602 return Op == getStartValue() || Op == getStepValue();
2603 }
2604};
2605
2606/// A recipe for handling phi nodes of integer and floating-point inductions,
2607/// producing their vector values. This is an abstract recipe and must be
2608/// converted to concrete recipes before executing.
2610 public VPIRFlags {
2611 TruncInst *Trunc;
2612
2613 // If this recipe is unrolled it will have 2 additional operands.
2614 bool isUnrolled() const { return getNumOperands() == 5; }
2615
2616public:
2618 VPValue *VF, const InductionDescriptor &IndDesc,
2619 const VPIRFlags &Flags, DebugLoc DL)
2620 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2621 Start, Step, IndDesc, DL),
2622 VPIRFlags(Flags), Trunc(nullptr) {
2623 addOperand(VF);
2624 }
2625
2627 VPValue *VF, const InductionDescriptor &IndDesc,
2628 TruncInst *Trunc, const VPIRFlags &Flags,
2629 DebugLoc DL)
2630 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2631 Start, Step, IndDesc,
2632 Trunc ? Trunc->getType() : Start->getType(), DL),
2633 VPIRFlags(Flags), Trunc(Trunc) {
2634 addOperand(VF);
2636 if (Trunc)
2638 assert(Metadata.empty() && "unexpected metadata on Trunc");
2639 }
2640
2642
2648
2649 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2650
2651 void execute(VPTransformState &State) override {
2652 llvm_unreachable("cannot execute this recipe, should be expanded via "
2653 "expandVPWidenIntOrFpInductionRecipe");
2654 }
2655
2656 /// Returns the start value of the induction.
2658
2659 /// If the recipe has been unrolled, return the VPValue for the induction
2660 /// increment, otherwise return null.
2662 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2663 }
2664
2665 /// Returns the number of incoming values, also number of incoming blocks.
2666 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2667 /// incoming value, its start value.
2668 unsigned getNumIncoming() const override { return 1; }
2669
2670 /// Returns the first defined value as TruncInst, if it is one or nullptr
2671 /// otherwise.
2672 TruncInst *getTruncInst() { return Trunc; }
2673 const TruncInst *getTruncInst() const { return Trunc; }
2674
2675 /// Returns true if the induction is canonical, i.e. starting at 0 and
2676 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2677 /// same type as the canonical induction.
2678 bool isCanonical() const;
2679
2680 /// Returns the VPValue representing the value of this induction at
2681 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2682 /// take place.
2684 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2685 }
2686
2687protected:
2688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2689 /// Print the recipe.
2690 void printRecipe(raw_ostream &O, const Twine &Indent,
2691 VPSlotTracker &SlotTracker) const override;
2692#endif
2693};
2694
2696public:
2697 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2698 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2699 /// VF*UF.
2701 VPValue *NumUnrolledElems,
2702 const InductionDescriptor &IndDesc, DebugLoc DL)
2703 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2704 Start, Step, IndDesc, DL) {
2705 addOperand(NumUnrolledElems);
2706 }
2707
2709
2715
2716 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2717
2718 /// Generate vector values for the pointer induction.
2719 void execute(VPTransformState &State) override {
2720 llvm_unreachable("cannot execute this recipe, should be expanded via "
2721 "expandVPWidenPointerInduction");
2722 };
2723
2724 /// Returns true if only scalar values will be generated.
2725 bool onlyScalarsGenerated(bool IsScalable);
2726
2727protected:
2728#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2729 /// Print the recipe.
2730 void printRecipe(raw_ostream &O, const Twine &Indent,
2731 VPSlotTracker &SlotTracker) const override;
2732#endif
2733};
2734
2735/// A recipe for widened phis. Incoming values are operands of the recipe and
2736/// their operand index corresponds to the incoming predecessor block. If the
2737/// recipe is placed in an entry block to a (non-replicate) region, it must have
2738/// exactly 2 incoming values, the first from the predecessor of the region and
2739/// the second from the exiting block of the region.
2741 public VPPhiAccessors {
2742 /// Name to use for the generated IR instruction for the widened phi.
2743 std::string Name;
2744
2745public:
2746 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingValues,
2747 /// debug location \p DL and \p Name.
2749 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2750 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues,
2751 IncomingValues[0]->getScalarType(),
2752 /*UV=*/nullptr, DL),
2753 Name(Name.str()) {
2754 assert(all_of(IncomingValues,
2755 [this](VPValue *VPV) {
2756 return VPV->getScalarType() == getScalarType();
2757 }) &&
2758 "all incoming values must have the same type");
2759 }
2760
2762 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2763 }
2764
2765 ~VPWidenPHIRecipe() override = default;
2766
2767 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2768
2769 /// Generate the phi/select nodes.
2770 void execute(VPTransformState &State) override;
2771
2772 /// Return the cost of this VPWidenPHIRecipe.
2774 VPCostContext &Ctx) const override;
2775
2776protected:
2777#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2778 /// Print the recipe.
2779 void printRecipe(raw_ostream &O, const Twine &Indent,
2780 VPSlotTracker &SlotTracker) const override;
2781#endif
2782
2783 const VPRecipeBase *getAsRecipe() const override { return this; }
2784};
2785
2786/// A recipe for handling first-order recurrence phis. The start value is the
2787/// first operand of the recipe and the incoming value from the backedge is the
2788/// second operand.
2791 VPValue &BackedgeValue)
2792 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2793 &Start) {
2794 addOperand(&BackedgeValue);
2795 }
2796
2797 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2798
2803
2804 void execute(VPTransformState &State) override;
2805
2806 /// Return the cost of this first-order recurrence phi recipe.
2808 VPCostContext &Ctx) const override;
2809
2810 /// Returns true if the recipe only uses the first lane of operand \p Op.
2811 bool usesFirstLaneOnly(const VPValue *Op) const override {
2813 "Op must be an operand of the recipe");
2814 return Op == getStartValue();
2815 }
2816
2817protected:
2818#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2819 /// Print the recipe.
2820 void printRecipe(raw_ostream &O, const Twine &Indent,
2821 VPSlotTracker &SlotTracker) const override;
2822#endif
2823};
2824
2825/// Possible variants of a reduction.
2826
2827/// This reduction is ordered and in-loop.
2828struct RdxOrdered {};
2829/// This reduction is in-loop.
2830struct RdxInLoop {};
2831/// This reduction is unordered with the partial result scaled down by some
2832/// factor.
2835};
2836using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2837
2838inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2839 unsigned ScaleFactor) {
2840 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2841 if (Ordered)
2842 return RdxOrdered{};
2843 if (InLoop)
2844 return RdxInLoop{};
2845 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2846}
2847
2848/// A recipe for handling reduction phis. The start value is the first operand
2849/// of the recipe and the incoming value from the backedge is the second
2850/// operand.
2852 /// The recurrence kind of the reduction.
2853 const RecurKind Kind;
2854
2855 ReductionStyle Style;
2856
2857 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2858 /// patterns for argmin/argmax).
2859 /// TODO: Also support cases where the phi itself has a single use, but its
2860 /// compare has multiple uses.
2861 bool HasUsesOutsideReductionChain;
2862
2863public:
2864 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2866 VPValue &BackedgeValue, ReductionStyle Style,
2867 const VPIRFlags &Flags,
2868 bool HasUsesOutsideReductionChain = false)
2869 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2870 VPIRFlags(Flags), Kind(Kind), Style(Style),
2871 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2872 addOperand(&BackedgeValue);
2873 }
2874
2875 ~VPReductionPHIRecipe() override = default;
2876
2878 VPValue *BackedgeValue) {
2879 return new VPReductionPHIRecipe(
2881 *Start, *BackedgeValue, Style, *this, HasUsesOutsideReductionChain);
2882 }
2883
2887
2888 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2889
2890 /// Generate the phi/select nodes.
2891 void execute(VPTransformState &State) override;
2892
2893 /// Get the factor that the VF of this recipe's output should be scaled by, or
2894 /// 1 if it isn't scaled.
2895 unsigned getVFScaleFactor() const {
2896 auto *Partial = std::get_if<RdxUnordered>(&Style);
2897 return Partial ? Partial->VFScaleFactor : 1;
2898 }
2899
2900 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2901 /// > 1.
2902 void setVFScaleFactor(unsigned ScaleFactor) {
2903 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2904 Style = RdxUnordered{ScaleFactor};
2905 }
2906
2907 /// Returns the recurrence kind of the reduction.
2908 RecurKind getRecurrenceKind() const { return Kind; }
2909
2910 /// Returns true, if the phi is part of an ordered reduction.
2911 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2912
2913 /// Returns true if the phi is part of an in-loop reduction.
2914 bool isInLoop() const {
2915 return std::holds_alternative<RdxInLoop>(Style) ||
2916 std::holds_alternative<RdxOrdered>(Style);
2917 }
2918
2919 /// Returns true if the reduction outputs a vector with a scaled down VF.
2920 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2921
2922 /// Returns true, if the phi is part of a multi-use reduction.
2924 return HasUsesOutsideReductionChain;
2925 }
2926
2927 /// Returns true if the recipe only uses the first lane of operand \p Op.
2928 bool usesFirstLaneOnly(const VPValue *Op) const override {
2930 "Op must be an operand of the recipe");
2931 return isOrdered() || isInLoop();
2932 }
2933
2934protected:
2935#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2936 /// Print the recipe.
2937 void printRecipe(raw_ostream &O, const Twine &Indent,
2938 VPSlotTracker &SlotTracker) const override;
2939#endif
2940};
2941
2942/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2943/// instructions.
2945public:
2946 /// The blend operation is a User of the incoming values and of their
2947 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2948 /// be omitted (implied by passing an odd number of operands) in which case
2949 /// all other incoming values are merged into it.
2951 const VPIRFlags &Flags, DebugLoc DL)
2952 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands,
2953 Operands[0]->getScalarType(), Flags, DL) {
2954 assert(Operands.size() >= 2 && "Expected at least two operands!");
2956 [this](unsigned I) {
2957 return getIncomingValue(I)->getScalarType() ==
2958 getScalarType();
2959 }) &&
2960 "all incoming values must have the same type");
2962 [this](unsigned I) {
2963 return getMask(I)->getScalarType()->isIntegerTy(1);
2964 }) &&
2965 "masks must be a bool");
2966 setUnderlyingValue(Phi);
2967 }
2968
2970
2973 NewOperands, *this, getDebugLoc());
2974 }
2975
2976 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2977
2978 /// A normalized blend is one that has an odd number of operands, whereby the
2979 /// first operand does not have an associated mask.
2980 bool isNormalized() const { return getNumOperands() % 2; }
2981
2982 /// Return the number of incoming values, taking into account when normalized
2983 /// the first incoming value will have no mask.
2984 unsigned getNumIncomingValues() const {
2985 return (getNumOperands() + isNormalized()) / 2;
2986 }
2987
2988 /// Return incoming value number \p Idx.
2989 VPValue *getIncomingValue(unsigned Idx) const {
2990 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2991 }
2992
2993 /// Return mask number \p Idx.
2994 VPValue *getMask(unsigned Idx) const {
2995 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2996 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2997 }
2998
2999 /// Set mask number \p Idx to \p V.
3000 void setMask(unsigned Idx, VPValue *V) {
3001 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
3002 assert(V->getScalarType()->isIntegerTy(1) && "Mask must be an i1 (vector)");
3003 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
3004 }
3005
3006 void execute(VPTransformState &State) override {
3007 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
3008 }
3009
3010 /// Return the cost of this VPWidenMemoryRecipe.
3011 InstructionCost computeCost(ElementCount VF,
3012 VPCostContext &Ctx) const override;
3013
3014 /// Returns true if the recipe only uses the first lane of operand \p Op.
3015 bool usesFirstLaneOnly(const VPValue *Op) const override;
3016
3017protected:
3018#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3019 /// Print the recipe.
3020 void printRecipe(raw_ostream &O, const Twine &Indent,
3021 VPSlotTracker &SlotTracker) const override;
3022#endif
3023};
3024
3025/// A common base class for interleaved memory operations.
3026/// An Interleaved memory operation is a memory access method that combines
3027/// multiple strided loads/stores into a single wide load/store with shuffles.
3028/// The first operand is the start address. The optional operands are, in order,
3029/// the stored values and the mask.
3031 public VPIRMetadata {
3033
3034 /// Indicates if the interleave group is in a conditional block and requires a
3035 /// mask.
3036 bool HasMask = false;
3037
3038 /// Indicates if gaps between members of the group need to be masked out or if
3039 /// unusued gaps can be loaded speculatively.
3040 bool NeedsMaskForGaps = false;
3041
3042protected:
3043 VPInterleaveBase(const unsigned char SC,
3045 ArrayRef<VPValue *> Operands,
3046 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3047 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3048 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
3049 NeedsMaskForGaps(NeedsMaskForGaps) {
3050 // TODO: extend the masked interleaved-group support to reversed access.
3051 assert((!Mask || !IG->isReverse()) &&
3052 "Reversed masked interleave-group not supported.");
3053 if (StoredValues.empty()) {
3054 for (Instruction *Inst : IG->members()) {
3055 assert(!Inst->getType()->isVoidTy() && "must have result");
3056 new VPMultiDefValue(this, Inst, Inst->getType());
3057 }
3058 } else {
3059 for (auto *SV : StoredValues)
3060 addOperand(SV);
3061 }
3062 if (Mask) {
3063 HasMask = true;
3064 addOperand(Mask);
3065 }
3066 }
3067
3068public:
3069 VPInterleaveBase *clone() override = 0;
3070
3071 static inline bool classof(const VPRecipeBase *R) {
3072 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
3073 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
3074 }
3075
3076 static inline bool classof(const VPUser *U) {
3077 auto *R = dyn_cast<VPRecipeBase>(U);
3078 return R && classof(R);
3079 }
3080
3081 /// Return the address accessed by this recipe.
3082 VPValue *getAddr() const {
3083 return getOperand(0); // Address is the 1st, mandatory operand.
3084 }
3085
3086 /// Return the mask used by this recipe. Note that a full mask is represented
3087 /// by a nullptr.
3088 VPValue *getMask() const {
3089 // Mask is optional and the last operand.
3090 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
3091 }
3092
3093 /// Return true if the access needs a mask because of the gaps.
3094 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
3095
3097
3098 Instruction *getInsertPos() const { return IG->getInsertPos(); }
3099
3100 void execute(VPTransformState &State) override {
3101 llvm_unreachable("VPInterleaveBase should not be instantiated.");
3102 }
3103
3104 /// Return the cost of this recipe.
3105 InstructionCost computeCost(ElementCount VF,
3106 VPCostContext &Ctx) const override;
3107
3108 /// Returns true if the recipe only uses the first lane of operand \p Op.
3109 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
3110
3111 /// Returns the number of stored operands of this interleave group. Returns 0
3112 /// for load interleave groups.
3113 virtual unsigned getNumStoreOperands() const = 0;
3114
3115 /// Return the VPValues stored by this interleave group. If it is a load
3116 /// interleave group, return an empty ArrayRef.
3118 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
3120 }
3121};
3122
3123/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
3124/// or stores into one wide load/store and shuffles. The first operand of a
3125/// VPInterleave recipe is the address, followed by the stored values, followed
3126/// by an optional mask.
3128public:
3130 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3131 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3132 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
3133 Mask, NeedsMaskForGaps, MD, DL) {}
3134
3135 ~VPInterleaveRecipe() override = default;
3136
3140 needsMaskForGaps(), *this, getDebugLoc());
3141 }
3142
3143 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
3144
3145 /// Generate the wide load or store, and shuffles.
3146 void execute(VPTransformState &State) override;
3147
3148 bool usesFirstLaneOnly(const VPValue *Op) const override {
3150 "Op must be an operand of the recipe");
3151 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
3152 }
3153
3154 unsigned getNumStoreOperands() const override {
3155 return getNumOperands() - (getMask() ? 2 : 1);
3156 }
3157
3158protected:
3159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3160 /// Print the recipe.
3161 void printRecipe(raw_ostream &O, const Twine &Indent,
3162 VPSlotTracker &SlotTracker) const override;
3163#endif
3164};
3165
3166/// A recipe for interleaved memory operations with vector-predication
3167/// intrinsics. The first operand is the address, the second operand is the
3168/// explicit vector length. Stored values and mask are optional operands.
3170public:
3172 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
3173 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3174 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3175 R.getDebugLoc()) {
3176 assert(!getInterleaveGroup()->isReverse() &&
3177 "Reversed interleave-group with tail folding is not supported.");
3178 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3179 "supported for scalable vector.");
3180 }
3181
3182 ~VPInterleaveEVLRecipe() override = default;
3183
3185 llvm_unreachable("cloning not implemented yet");
3186 }
3187
3188 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3189
3190 /// The VPValue of the explicit vector length.
3191 VPValue *getEVL() const { return getOperand(1); }
3192
3193 /// Generate the wide load or store, and shuffles.
3194 void execute(VPTransformState &State) override;
3195
3196 /// The recipe only uses the first lane of the address, and EVL operand.
3197 bool usesFirstLaneOnly(const VPValue *Op) const override {
3199 "Op must be an operand of the recipe");
3200 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3201 Op == getEVL();
3202 }
3203
3204 unsigned getNumStoreOperands() const override {
3205 return getNumOperands() - (getMask() ? 3 : 2);
3206 }
3207
3208protected:
3209#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3210 /// Print the recipe.
3211 void printRecipe(raw_ostream &O, const Twine &Indent,
3212 VPSlotTracker &SlotTracker) const override;
3213#endif
3214};
3215
3216/// A recipe to represent inloop, ordered or partial reduction operations. It
3217/// performs a reduction on a vector operand into a scalar (vector in the case
3218/// of a partial reduction) value, and adds the result to a chain. The Operands
3219/// are {ChainOp, VecOp, [Condition]}.
3221
3222 /// The recurrence kind for the reduction in question.
3223 RecurKind RdxKind;
3224 /// Whether the reduction is conditional.
3225 bool IsConditional = false;
3226 ReductionStyle Style;
3227
3228protected:
3229 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3231 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3232 ReductionStyle Style, DebugLoc DL)
3233 : VPRecipeWithIRFlags(SC, Operands, Operands[0]->getScalarType(), FMFs,
3234 DL),
3235 RdxKind(RdxKind), Style(Style) {
3236 assert(all_of(Operands,
3237 [this](VPValue *VPV) {
3238 return VPV->getScalarType() == getScalarType() ||
3239 (isa<VPInstruction>(VPV) &&
3240 cast<VPInstruction>(VPV)->getOpcode() ==
3242 }) &&
3243 "all incoming values must have the same type");
3244 if (CondOp) {
3245 assert(CondOp->getScalarType()->isIntegerTy(1) &&
3246 "CondOp must be a bool");
3247 IsConditional = true;
3248 addOperand(CondOp);
3249 }
3251 }
3252
3253public:
3255 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3257 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3258 {ChainOp, VecOp}, CondOp, Style, DL) {}
3259
3261 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3263 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3264 {ChainOp, VecOp}, CondOp, Style, DL) {}
3265
3266 ~VPReductionRecipe() override = default;
3267
3269 return new VPReductionRecipe(RdxKind, getFastMathFlagsOrNone(),
3271 getCondOp(), Style, getDebugLoc());
3272 }
3273
3274 static inline bool classof(const VPRecipeBase *R) {
3275 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3276 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3277 }
3278
3279 static inline bool classof(const VPUser *U) {
3280 auto *R = dyn_cast<VPRecipeBase>(U);
3281 return R && classof(R);
3282 }
3283
3284 static inline bool classof(const VPValue *VPV) {
3285 const VPRecipeBase *R = VPV->getDefiningRecipe();
3286 return R && classof(R);
3287 }
3288
3289 static inline bool classof(const VPSingleDefRecipe *R) {
3290 return classof(static_cast<const VPRecipeBase *>(R));
3291 }
3292
3293 /// Generate the reduction in the loop.
3294 void execute(VPTransformState &State) override;
3295
3296 /// Return the cost of VPReductionRecipe.
3297 InstructionCost computeCost(ElementCount VF,
3298 VPCostContext &Ctx) const override;
3299
3300 /// Return the recurrence kind for the in-loop reduction.
3301 RecurKind getRecurrenceKind() const { return RdxKind; }
3302 /// Return true if the in-loop reduction is ordered.
3303 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3304 /// Return true if the in-loop reduction is conditional.
3305 bool isConditional() const { return IsConditional; };
3306 /// Returns true if the reduction outputs a vector with a scaled down VF.
3307 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3308 /// Returns true if the reduction is in-loop.
3309 bool isInLoop() const {
3310 return std::holds_alternative<RdxInLoop>(Style) ||
3311 std::holds_alternative<RdxOrdered>(Style);
3312 }
3313 /// The VPValue of the scalar Chain being accumulated.
3314 VPValue *getChainOp() const { return getOperand(0); }
3315 /// The VPValue of the vector value to be reduced.
3316 VPValue *getVecOp() const { return getOperand(1); }
3317 /// The VPValue of the condition for the block.
3319 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3320 }
3321 /// Get the factor that the VF of this recipe's output should be scaled by, or
3322 /// 1 if it isn't scaled.
3323 unsigned getVFScaleFactor() const {
3324 auto *Partial = std::get_if<RdxUnordered>(&Style);
3325 return Partial ? Partial->VFScaleFactor : 1;
3326 }
3327
3328protected:
3329#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3330 /// Print the recipe.
3331 void printRecipe(raw_ostream &O, const Twine &Indent,
3332 VPSlotTracker &SlotTracker) const override;
3333#endif
3334};
3335
3336/// A recipe to represent inloop reduction operations with vector-predication
3337/// intrinsics, performing a reduction on a vector operand with the explicit
3338/// vector length (EVL) into a scalar value, and adding the result to a chain.
3339/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3341public:
3344 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3347 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3348 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3349 DL) {}
3350
3351 ~VPReductionEVLRecipe() override = default;
3352
3354 llvm_unreachable("cloning not implemented yet");
3355 }
3356
3357 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3358
3359 /// Generate the reduction in the loop
3360 void execute(VPTransformState &State) override;
3361
3362 /// The VPValue of the explicit vector length.
3363 VPValue *getEVL() const { return getOperand(2); }
3364
3365 /// Returns true if the recipe only uses the first lane of operand \p Op.
3366 bool usesFirstLaneOnly(const VPValue *Op) const override {
3368 "Op must be an operand of the recipe");
3369 return Op == getEVL();
3370 }
3371
3372protected:
3373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3374 /// Print the recipe.
3375 void printRecipe(raw_ostream &O, const Twine &Indent,
3376 VPSlotTracker &SlotTracker) const override;
3377#endif
3378};
3379
3380/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3381/// copies of the original scalar type, one per lane, instead of producing a
3382/// single copy of widened type for all lanes. If the instruction is known to be
3383/// a single scalar, only one copy will be generated.
3385 public VPIRMetadata {
3386 /// Indicator if only a single replica per lane is needed.
3387 bool IsSingleScalar;
3388
3389 /// Indicator if the replicas are also predicated.
3390 bool IsPredicated;
3391
3392public:
3394 bool IsSingleScalar, VPValue *Mask = nullptr,
3395 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3396 DebugLoc DL = DebugLoc::getUnknown())
3397 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands,
3398 computeScalarType(I, Operands), Flags, DL),
3399 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3400 IsPredicated(Mask) {
3401 assert((!IsSingleScalar || !I->isCast()) &&
3402 "single-scalar casts should use VPInstructionWithType");
3403 setUnderlyingValue(I);
3404 if (Mask)
3405 addOperand(Mask);
3406 }
3407
3408 ~VPReplicateRecipe() override = default;
3409
3410 /// Compute the scalar result type for a VPReplicateRecipe wrapping \p I with
3411 /// \p Operands (excluding any predicate mask).
3412 static Type *computeScalarType(const Instruction *I,
3413 ArrayRef<VPValue *> Operands);
3414
3416
3418 auto *Copy = new VPReplicateRecipe(
3419 getUnderlyingInstr(), NewOperands, IsSingleScalar,
3420 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3421 Copy->transferFlags(*this);
3422 return Copy;
3423 }
3424
3425 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3426
3427 /// Generate replicas of the desired Ingredient. Replicas will be generated
3428 /// for all parts and lanes unless a specific part and lane are specified in
3429 /// the \p State.
3430 void execute(VPTransformState &State) override;
3431
3432 /// Return the cost of this VPReplicateRecipe.
3433 InstructionCost computeCost(ElementCount VF,
3434 VPCostContext &Ctx) const override;
3435
3436 /// Return the cost of scalarizing a call to \p CalledFn with argument
3437 /// operands \p ArgOps for a given \p VF.
3438 static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy,
3440 bool IsSingleScalar, ElementCount VF,
3441 VPCostContext &Ctx);
3442
3443 bool isSingleScalar() const { return IsSingleScalar; }
3444
3445 bool isPredicated() const { return IsPredicated; }
3446
3447 /// Returns true if the recipe only uses the first lane of operand \p Op.
3448 bool usesFirstLaneOnly(const VPValue *Op) const override {
3450 "Op must be an operand of the recipe");
3451 return isSingleScalar();
3452 }
3453
3454 /// Returns true if the recipe uses scalars of operand \p Op.
3455 bool usesScalars(const VPValue *Op) const override {
3457 "Op must be an operand of the recipe");
3458 return true;
3459 }
3460
3461 /// Return the mask of a predicated VPReplicateRecipe.
3463 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3464 return getOperand(getNumOperands() - 1);
3465 }
3466
3467 /// Return the recipe's operands, excluding the mask of a predicated recipe.
3471
3472 /// Returns the number of operands, excluding the mask if the recipe is
3473 /// predicated.
3474 unsigned getNumOperandsWithoutMask() const {
3475 return getNumOperands() - isPredicated();
3476 }
3477
3478 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3479
3480protected:
3481#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3482 /// Print the recipe.
3483 void printRecipe(raw_ostream &O, const Twine &Indent,
3484 VPSlotTracker &SlotTracker) const override;
3485#endif
3486};
3487
3488/// A recipe for generating conditional branches on the bits of a mask.
3490public:
3492 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3493
3496 }
3497
3498 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3499
3500 /// Generate the extraction of the appropriate bit from the block mask and the
3501 /// conditional branch.
3502 void execute(VPTransformState &State) override;
3503
3504 /// Return the cost of this VPBranchOnMaskRecipe.
3505 InstructionCost computeCost(ElementCount VF,
3506 VPCostContext &Ctx) const override;
3507
3508#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3509 /// Print the recipe.
3510 void printRecipe(raw_ostream &O, const Twine &Indent,
3511 VPSlotTracker &SlotTracker) const override {
3512 O << Indent << "BRANCH-ON-MASK ";
3514 }
3515#endif
3516
3517 /// Returns true if the recipe uses scalars of operand \p Op.
3518 bool usesScalars(const VPValue *Op) const override {
3520 "Op must be an operand of the recipe");
3521 return true;
3522 }
3523};
3524
3525/// A recipe to combine multiple recipes into a single 'expression' recipe,
3526/// which should be considered a single entity for cost-modeling and transforms.
3527/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3528/// expression recipes, before execute. The individual expression recipes are
3529/// completely disconnected from the def-use graph of other recipes not part of
3530/// the expression. Def-use edges between pairs of expression recipes remain
3531/// intact, whereas every edge between an expression recipe and a recipe outside
3532/// the expression is elevated to connect the non-expression recipe with the
3533/// VPExpressionRecipe itself.
3534class VPExpressionRecipe : public VPSingleDefRecipe {
3535 /// Recipes included in this VPExpressionRecipe. This could contain
3536 /// duplicates.
3537 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3538
3539 /// Temporary VPValues used for external operands of the expression, i.e.
3540 /// operands not defined by recipes in the expression.
3541 SmallVector<VPValue *> LiveInPlaceholders;
3542
3543 enum class ExpressionTypes {
3544 /// Represents an inloop extended reduction operation, performing a
3545 /// reduction on an extended vector operand into a scalar value, and adding
3546 /// the result to a chain.
3547 ExtendedReduction,
3548 /// Represents an inloop extended reduction operation, which is negated,
3549 /// then reduced before adding the result to a chain.
3550 NegatedExtendedReduction,
3551 /// Represent an inloop multiply-accumulate reduction, multiplying the
3552 /// extended vector operands, performing a reduction.add on the result, and
3553 /// adding the scalar result to a chain.
3554 ExtMulAccReduction,
3555 /// Represent an inloop multiply-accumulate reduction, multiplying the
3556 /// vector operands, performing a reduction.add on the result, and adding
3557 /// the scalar result to a chain.
3558 MulAccReduction,
3559 /// Represent an inloop multiply-accumulate reduction, multiplying the
3560 /// extended vector operands, negating the multiplication, performing a
3561 /// reduction.add on the result, and adding the scalar result to a chain.
3562 ExtNegatedMulAccReduction,
3563 };
3564
3565 /// Type of the expression.
3566 ExpressionTypes ExpressionType;
3567
3568 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3569 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3570 /// in the expression) are replaced by temporary VPValues and the original
3571 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3572 /// as needed (excluding last) to ensure they are only used by other recipes
3573 /// in the expression.
3574 VPExpressionRecipe(ExpressionTypes ExpressionType,
3575 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3576
3577public:
3579 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3581 VPReductionRecipe *Red)
3582 : VPExpressionRecipe(ExpressionTypes::NegatedExtendedReduction,
3583 {Ext, Neg, Red}) {
3584 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3585 Red->getRecurrenceKind() == RecurKind::FAdd ||
3586 Red->getRecurrenceKind() == RecurKind::AddChainWithSubs) &&
3587 "Expected an add or add-chain-with-subs reduction");
3588 if (Neg->getOpcode() == Instruction::Sub) {
3589 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(1));
3590 assert(SubConst && SubConst->isZero() && "Expected a negating sub");
3591 } else
3592 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3593 }
3595 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3598 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3599 {Ext0, Ext1, Mul, Red}) {}
3602 VPReductionRecipe *Red)
3603 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3604 {Ext0, Ext1, Mul, Neg, Red}) {
3605 assert((Mul->getOpcode() == Instruction::Mul ||
3606 Mul->getOpcode() == Instruction::FMul) &&
3607 "Expected a mul");
3608 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3609 Red->getRecurrenceKind() == RecurKind::FAdd ||
3610 Red->getRecurrenceKind() == RecurKind::AddChainWithSubs) &&
3611 "Expected an add or add-chain-with-subs reduction");
3612 assert(getNumOperands() >= 3 && "Expected at least three operands");
3613 if (Neg->getOpcode() == Instruction::Sub) {
3614 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3615 assert(SubConst && SubConst->isZero() &&
3616 Neg->getOpcode() == Instruction::Sub && "Expected a negating sub");
3617 } else
3618 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3619 }
3620
3622 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3623 for (auto *R : reverse(ExpressionRecipes)) {
3624 if (ExpressionRecipesSeen.insert(R).second)
3625 delete R;
3626 }
3627 for (VPValue *T : LiveInPlaceholders)
3628 delete T;
3629 }
3630
3631 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3632
3633 VPExpressionRecipe *clone() override {
3634 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3635 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3636 for (auto *R : ExpressionRecipes)
3637 NewExpressiondRecipes.push_back(R->clone());
3638 for (auto *New : NewExpressiondRecipes) {
3639 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3640 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3641 // Update placeholder operands in the cloned recipe to use the external
3642 // operands, to be internalized when the cloned expression is constructed.
3643 for (const auto &[Placeholder, OutsideOp] :
3644 zip(LiveInPlaceholders, operands()))
3645 New->replaceUsesOfWith(Placeholder, OutsideOp);
3646 }
3647 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3648 }
3649
3650 /// Return the VPValue to use to infer the result type of the recipe.
3652 unsigned OpIdx =
3653 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3654 : 1;
3655 return getOperand(getNumOperands() - OpIdx);
3656 }
3657
3658 /// Insert the recipes of the expression back into the VPlan, directly before
3659 /// the current recipe. Leaves the expression recipe empty, which must be
3660 /// removed before codegen.
3661 void decompose();
3662
3663 unsigned getVFScaleFactor() const {
3664 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3665 return PR ? PR->getVFScaleFactor() : 1;
3666 }
3667
3668 /// Method for generating code, must not be called as this recipe is abstract.
3669 void execute(VPTransformState &State) override {
3670 llvm_unreachable("recipe must be removed before execute");
3671 }
3672
3674 VPCostContext &Ctx) const override;
3675
3676 /// Returns true if this expression contains recipes that may read from or
3677 /// write to memory.
3678 bool mayReadOrWriteMemory() const;
3679
3680 /// Returns true if this expression contains recipes that may have side
3681 /// effects.
3682 bool mayHaveSideEffects() const;
3683
3684 /// Returns true if this VPExpressionRecipe produces a single scalar.
3685 bool isVectorToScalar() const;
3686
3687protected:
3688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3689 /// Print the recipe.
3690 void printRecipe(raw_ostream &O, const Twine &Indent,
3691 VPSlotTracker &SlotTracker) const override;
3692#endif
3693};
3694
3695/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3696/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3697/// order to merge values that are set under such a branch and feed their uses.
3698/// The phi nodes can be scalar or vector depending on the users of the value.
3699/// This recipe works in concert with VPBranchOnMaskRecipe.
3701public:
3702 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3703 /// nodes after merging back from a Branch-on-Mask.
3705 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV,
3706 PredV->getScalarType(), /*UV=*/nullptr, DL) {}
3707 ~VPPredInstPHIRecipe() override = default;
3708
3710 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3711 }
3712
3713 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3714
3715 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3716 /// retain SSA form.
3717 void execute(VPTransformState &State) override;
3718
3719 /// Return the cost of this VPPredInstPHIRecipe.
3721 VPCostContext &Ctx) const override {
3722 // TODO: Compute accurate cost after retiring the legacy cost model.
3723 return 0;
3724 }
3725
3726protected:
3727#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3728 /// Print the recipe.
3729 void printRecipe(raw_ostream &O, const Twine &Indent,
3730 VPSlotTracker &SlotTracker) const override;
3731#endif
3732};
3733
3734/// A common mixin class for widening memory operations. An optional mask can be
3735/// provided as the last operand.
3737protected:
3739
3740 /// Alignment information for this memory access.
3742
3743 /// Whether the accessed addresses are consecutive.
3745
3746 /// Whether the memory access is masked.
3747 bool IsMasked = false;
3748
3749 void setMask(VPValue *Mask) {
3750 assert(!IsMasked && "cannot re-set mask");
3751 if (!Mask)
3752 return;
3753 assert(Mask->getScalarType()->isIntegerTy(1) &&
3754 "Mask must be an i1 (vector)");
3755 getAsRecipe()->addOperand(Mask);
3756 IsMasked = true;
3757 }
3758
3763
3764public:
3765 virtual ~VPWidenMemoryRecipe() = default;
3766
3767 /// Return a VPRecipeBase* to the current object.
3769 virtual const VPRecipeBase *getAsRecipe() const = 0;
3770
3771 /// Return whether the loaded-from / stored-to addresses are consecutive.
3772 bool isConsecutive() const { return Consecutive; }
3773
3774 /// Return the address accessed by this recipe.
3775 VPValue *getAddr() const { return getAsRecipe()->getOperand(0); }
3776
3777 /// Returns true if the recipe is masked.
3778 bool isMasked() const { return IsMasked; }
3779
3780 /// Return the mask used by this recipe. Note that a full mask is represented
3781 /// by a nullptr.
3782 VPValue *getMask() const {
3783 // Mask is optional and therefore the last operand.
3784 const VPRecipeBase *R = getAsRecipe();
3785 return isMasked() ? R->getOperand(R->getNumOperands() - 1) : nullptr;
3786 }
3787
3788 /// Returns the alignment of the memory access.
3789 Align getAlign() const { return Alignment; }
3790
3791 /// Return the cost of this VPWidenMemoryRecipe.
3792 InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
3793
3795};
3796
3797/// A recipe for widening load operations, using the address to load from and an
3798/// optional mask.
3800 public VPWidenMemoryRecipe {
3802 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3803 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadSC, {Addr}, Load.getType(),
3804 &Load, DL),
3805 VPWidenMemoryRecipe(Load, Consecutive, Metadata) {
3806 setMask(Mask);
3807 }
3808
3811 getMask(), Consecutive, *this, getDebugLoc());
3812 }
3813
3814 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3815
3816 /// Generate a wide load or gather.
3817 void execute(VPTransformState &State) override;
3818
3819 /// Return the cost of this VPWidenLoadRecipe.
3821 VPCostContext &Ctx) const override {
3822 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3823 }
3824
3825 /// Returns true if the recipe only uses the first lane of operand \p Op.
3826 bool usesFirstLaneOnly(const VPValue *Op) const override {
3828 "Op must be an operand of the recipe");
3829 // Widened, consecutive loads operations only demand the first lane of
3830 // their address.
3831 return Op == getAddr() && isConsecutive();
3832 }
3833
3834protected:
3835 VPRecipeBase *getAsRecipe() override;
3836 const VPRecipeBase *getAsRecipe() const override;
3837
3838#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3839 /// Print the recipe.
3840 void printRecipe(raw_ostream &O, const Twine &Indent,
3841 VPSlotTracker &SlotTracker) const override;
3842#endif
3843};
3844
3845/// A recipe for widening load operations with vector-predication intrinsics,
3846/// using the address to load from, the explicit vector length and an optional
3847/// mask.
3849 : public VPSingleDefRecipe,
3850 public VPWidenMemoryRecipe {
3852 VPValue *Mask)
3853 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadEVLSC, {Addr, &EVL},
3854 L.getIngredient().getType(), &L.getIngredient(),
3855 L.getDebugLoc()),
3856 VPWidenMemoryRecipe(L.getIngredient(), L.isConsecutive(), L) {
3857 setMask(Mask);
3858 }
3859
3861 llvm_unreachable("cloning not supported");
3862 }
3863
3864 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3865
3866 /// Return the EVL operand.
3867 VPValue *getEVL() const { return getOperand(1); }
3868
3869 /// Generate the wide load or gather.
3870 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3871
3872 /// Return the cost of this VPWidenLoadEVLRecipe.
3874 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3875
3876 /// Returns true if the recipe only uses the first lane of operand \p Op.
3877 bool usesFirstLaneOnly(const VPValue *Op) const override {
3879 "Op must be an operand of the recipe");
3880 // Widened loads only demand the first lane of EVL and consecutive loads
3881 // only demand the first lane of their address.
3882 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3883 }
3884
3885protected:
3886 LLVM_ABI_FOR_TEST VPRecipeBase *getAsRecipe() override;
3887 LLVM_ABI_FOR_TEST const VPRecipeBase *getAsRecipe() const override;
3888
3889#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3890 /// Print the recipe.
3891 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3892 VPSlotTracker &SlotTracker) const override;
3893#endif
3894};
3895
3896/// A recipe for widening store operations, using the stored value, the address
3897/// to store to and an optional mask.
3899 public VPWidenMemoryRecipe {
3900 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3901 VPValue *Mask, bool Consecutive,
3902 const VPIRMetadata &Metadata, DebugLoc DL)
3903 : VPRecipeBase(VPRecipeBase::VPWidenStoreSC, {Addr, StoredVal}, DL),
3904 VPWidenMemoryRecipe(Store, Consecutive, Metadata) {
3905 setMask(Mask);
3906 }
3907
3911 *this, getDebugLoc());
3912 }
3913
3914 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3915
3916 /// Return the value stored by this recipe.
3917 VPValue *getStoredValue() const { return getOperand(1); }
3918
3919 /// Generate a wide store or scatter.
3920 void execute(VPTransformState &State) override;
3921
3922 /// Return the cost of this VPWidenStoreRecipe.
3924 VPCostContext &Ctx) const override {
3925 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3926 }
3927
3928 /// Returns true if the recipe only uses the first lane of operand \p Op.
3929 bool usesFirstLaneOnly(const VPValue *Op) const override {
3931 "Op must be an operand of the recipe");
3932 // Widened, consecutive stores only demand the first lane of their address,
3933 // unless the same operand is also stored.
3934 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3935 }
3936
3937protected:
3938 VPRecipeBase *getAsRecipe() override;
3939 const VPRecipeBase *getAsRecipe() const override;
3940
3941#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3942 /// Print the recipe.
3943 void printRecipe(raw_ostream &O, const Twine &Indent,
3944 VPSlotTracker &SlotTracker) const override;
3945#endif
3946};
3947
3948/// A recipe for widening store operations with vector-predication intrinsics,
3949/// using the value to store, the address to store to, the explicit vector
3950/// length and an optional mask.
3952 : public VPRecipeBase,
3953 public VPWidenMemoryRecipe {
3955 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3956 : VPRecipeBase(VPRecipeBase::VPWidenStoreEVLSC, {Addr, StoredVal, &EVL},
3957 S.getDebugLoc()),
3958 VPWidenMemoryRecipe(S.getIngredient(), S.isConsecutive(), S) {
3959 setMask(Mask);
3960 }
3961
3963 llvm_unreachable("cloning not supported");
3964 }
3965
3966 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3967
3968 /// Return the address accessed by this recipe.
3969 VPValue *getStoredValue() const { return getOperand(1); }
3970
3971 /// Return the EVL operand.
3972 VPValue *getEVL() const { return getOperand(2); }
3973
3974 /// Generate the wide store or scatter.
3975 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3976
3977 /// Return the cost of this VPWidenStoreEVLRecipe.
3979 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3980
3981 /// Returns true if the recipe only uses the first lane of operand \p Op.
3982 bool usesFirstLaneOnly(const VPValue *Op) const override {
3984 "Op must be an operand of the recipe");
3985 if (Op == getEVL()) {
3986 assert(getStoredValue() != Op && "unexpected store of EVL");
3987 return true;
3988 }
3989 // Widened, consecutive memory operations only demand the first lane of
3990 // their address, unless the same operand is also stored. That latter can
3991 // happen with opaque pointers.
3992 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3993 }
3994
3995protected:
3996 LLVM_ABI_FOR_TEST VPRecipeBase *getAsRecipe() override;
3997 LLVM_ABI_FOR_TEST const VPRecipeBase *getAsRecipe() const override;
3998
3999#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4000 /// Print the recipe.
4001 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4002 VPSlotTracker &SlotTracker) const override;
4003#endif
4004};
4005
4006/// Recipe to expand a SCEV expression.
4008 const SCEV *Expr;
4009
4010public:
4011 VPExpandSCEVRecipe(const SCEV *Expr);
4012
4013 ~VPExpandSCEVRecipe() override = default;
4014
4015 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
4016
4017 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
4018
4019 void execute(VPTransformState &State) override {
4020 llvm_unreachable("SCEV expressions must be expanded before final execute");
4021 }
4022
4023 /// Return the cost of this VPExpandSCEVRecipe.
4025 VPCostContext &Ctx) const override {
4026 // TODO: Compute accurate cost after retiring the legacy cost model.
4027 return 0;
4028 }
4029
4030 const SCEV *getSCEV() const { return Expr; }
4031
4032protected:
4033#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4034 /// Print the recipe.
4035 void printRecipe(raw_ostream &O, const Twine &Indent,
4036 VPSlotTracker &SlotTracker) const override;
4037#endif
4038};
4039
4040/// A recipe for generating the active lane mask for the vector loop that is
4041/// used to predicate the vector operations.
4043public:
4045 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
4046 StartMask, DL) {}
4047
4048 ~VPActiveLaneMaskPHIRecipe() override = default;
4049
4052 if (getNumOperands() == 2)
4053 R->addBackedgeValue(getOperand(1));
4054 return R;
4055 }
4056
4057 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
4058
4059 /// Generate the active lane mask phi of the vector loop.
4060 void execute(VPTransformState &State) override;
4061
4062protected:
4063#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4064 /// Print the recipe.
4065 void printRecipe(raw_ostream &O, const Twine &Indent,
4066 VPSlotTracker &SlotTracker) const override;
4067#endif
4068};
4069
4070/// A recipe for generating the phi node tracking the current scalar iteration
4071/// index. It starts at the start value of the canonical induction and gets
4072/// incremented by the number of scalar iterations processed by the vector loop
4073/// iteration. The increment does not have to be loop invariant.
4075public:
4077 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
4078 StartIV, DL) {}
4079
4080 ~VPCurrentIterationPHIRecipe() override = default;
4081
4083 llvm_unreachable("cloning not implemented yet");
4084 }
4085
4086 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
4087
4088 void execute(VPTransformState &State) override {
4089 llvm_unreachable("cannot execute this recipe, should be replaced by a "
4090 "scalar phi recipe");
4091 }
4092
4093 /// Return the cost of this VPCurrentIterationPHIRecipe.
4095 VPCostContext &Ctx) const override {
4096 // For now, match the behavior of the legacy cost model.
4097 return 0;
4098 }
4099
4100 /// Returns true if the recipe only uses the first lane of operand \p Op.
4101 bool usesFirstLaneOnly(const VPValue *Op) const override {
4103 "Op must be an operand of the recipe");
4104 return true;
4105 }
4106
4107protected:
4108#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4109 /// Print the recipe.
4110 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4111 VPSlotTracker &SlotTracker) const override;
4112#endif
4113};
4114
4115/// A Recipe for widening the canonical induction variable of the vector loop.
4116/// First operand is the canonical IV recipe, a second step operand (VF * Part)
4117/// is added during unrolling.
4119public:
4121 const VPIRFlags::WrapFlagsTy &Flags = {false, false})
4122 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCanonicalIVSC, CanonicalIV,
4123 CanonicalIV->getType(), Flags) {}
4124
4125 ~VPWidenCanonicalIVRecipe() override = default;
4126
4128 auto *WideCanIV =
4130 if (VPValue *Step = getStepValue())
4131 WideCanIV->addPerPartStep(Step);
4132 return WideCanIV;
4133 }
4134
4135 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
4136
4137 void execute(VPTransformState &State) override {
4138 llvm_unreachable("Expected prior expansion of WidenCanonicalIV recipes");
4139 }
4140
4141 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
4143 VPCostContext &Ctx) const override {
4144 // TODO: Compute accurate cost after retiring the legacy cost model.
4145 return 0;
4146 }
4147
4148 /// Return the canonical IV being widened.
4152
4154 return getNumOperands() == 2 ? getOperand(1) : nullptr;
4155 }
4156
4157 /// Add the per-part step (VF * Part) used for unrolled parts.
4159 assert(Step->getScalarType() == getScalarType() &&
4160 "per-part step must have the same type as the canonical IV");
4161 VPUser::addOperand(Step);
4162 }
4163
4164protected:
4165#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4166 /// Print the recipe.
4167 void printRecipe(raw_ostream &O, const Twine &Indent,
4168 VPSlotTracker &SlotTracker) const override;
4169#endif
4170};
4171
4172/// A recipe for converting the input value \p IV value to the corresponding
4173/// value of an IV with different start and step values, using Start + IV *
4174/// Step.
4176 /// Kind of the induction.
4178 /// If not nullptr, the floating point induction binary operator. Must be set
4179 /// for floating point inductions.
4180 const FPMathOperator *FPBinOp;
4181
4182public:
4184 const FPMathOperator *FPBinOp, VPIRValue *Start,
4185 VPValue *IV, VPValue *Step)
4186 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step},
4187 Start->getScalarType(), nullptr),
4188 Kind(Kind), FPBinOp(FPBinOp) {}
4189
4190 ~VPDerivedIVRecipe() override = default;
4191
4193 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
4194 getStepValue());
4195 }
4196
4197 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4198
4199 void execute(VPTransformState &State) override {
4200 llvm_unreachable("Expected prior expansion of this recipe");
4201 }
4202
4203 /// Return the cost of this VPDerivedIVRecipe.
4205 VPCostContext &Ctx) const override;
4206
4208 VPValue *getIndex() const { return getOperand(1); }
4209 VPValue *getStepValue() const { return getOperand(2); }
4210 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
4212
4213 /// Returns true if the recipe only uses the first lane of operand \p Op.
4214 bool usesFirstLaneOnly(const VPValue *Op) const override {
4216 "Op must be an operand of the recipe");
4217 return true;
4218 }
4219
4220protected:
4221#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4222 /// Print the recipe.
4223 void printRecipe(raw_ostream &O, const Twine &Indent,
4224 VPSlotTracker &SlotTracker) const override;
4225#endif
4226};
4227
4228/// A recipe for handling phi nodes of integer and floating-point inductions,
4229/// producing their scalar values. Before unrolling by UF the recipe represents
4230/// the VF*UF scalar values to be produced, or UF scalar values if only first
4231/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4232/// operand StartIndex to all unroll parts except part 0, as the recipe
4233/// represents the VF scalar values (this number of values is taken from
4234/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4236 Instruction::BinaryOps InductionOpcode;
4237
4238public:
4241 DebugLoc DL)
4242 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4243 IV->getScalarType(), FMFs, DL),
4244 InductionOpcode(Opcode) {}
4245
4247 VPValue *Step, VPValue *VF,
4250 IV, Step, VF, IndDesc.getInductionOpcode(),
4251 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4252 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4253 : FastMathFlags(),
4254 DL) {}
4255
4256 ~VPScalarIVStepsRecipe() override = default;
4257
4259 auto *NewR = new VPScalarIVStepsRecipe(
4260 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
4262 if (VPValue *StartIndex = getStartIndex())
4263 NewR->setStartIndex(StartIndex);
4264 return NewR;
4265 }
4266
4267 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4268
4269 /// Generate the scalarized versions of the phi node as needed by their users.
4270 void execute(VPTransformState &State) override;
4271
4272 /// Return the cost of this VPScalarIVStepsRecipe.
4273 InstructionCost computeCost(ElementCount VF,
4274 VPCostContext &Ctx) const override;
4275
4276 VPValue *getStepValue() const { return getOperand(1); }
4277
4278 /// Return the number of scalars to produce per unroll part, used to compute
4279 /// StartIndex during unrolling.
4280 VPValue *getVFValue() const { return getOperand(2); }
4281
4282 /// Return the StartIndex, or null if known to be zero, valid only after
4283 /// unrolling.
4285 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4286 }
4287
4288 /// Set or add the StartIndex operand.
4289 void setStartIndex(VPValue *StartIndex) {
4290 if (getNumOperands() == 4)
4291 setOperand(3, StartIndex);
4292 else
4293 addOperand(StartIndex);
4294 }
4295
4296 /// Returns true if the recipe only uses the first lane of operand \p Op.
4297 bool usesFirstLaneOnly(const VPValue *Op) const override {
4299 "Op must be an operand of the recipe");
4300 return true;
4301 }
4302
4303 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4304
4305protected:
4306#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4307 /// Print the recipe.
4308 void printRecipe(raw_ostream &O, const Twine &Indent,
4309 VPSlotTracker &SlotTracker) const override;
4310#endif
4311};
4312
4313/// CastInfo helper for casting from VPRecipeBase to a mixin class that is not
4314/// part of the VPRecipeBase class hierarchy (e.g. VPPhiAccessors,
4315/// VPIRMetadata).
4316namespace vpdetail {
4317template <typename VPMixin, typename... RecipeTys>
4319 : public DefaultDoCastIfPossible<VPMixin *, VPRecipeBase *,
4320 CastInfoMixinImpl<VPMixin, RecipeTys...>> {
4321 static_assert((std::is_base_of_v<VPMixin, RecipeTys> && ...),
4322 "Each type in RecipeTys must derive from VPMixin");
4323
4324 /// Used by isa.
4325 static bool isPossible(VPRecipeBase *R) { return isa<RecipeTys...>(R); }
4326
4327 /// Used by cast.
4328 static VPMixin *doCast(VPRecipeBase *R) {
4329 VPMixin *Out = nullptr;
4330 ((Out = dyn_cast<RecipeTys>(R)) || ...);
4331 assert(Out && "Illegal recipe for cast");
4332 return Out;
4333 }
4334 static VPMixin *castFailed() { return nullptr; }
4335};
4336} // namespace vpdetail
4337
4338/// Support casting from VPRecipeBase -> VPPhiAccessors.
4339template <>
4343
4344template <>
4349template <>
4351 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4352 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4353
4354/// Support casting from VPRecipeBase / VPUser -> VPWidenMemoryRecipe.
4355template <>
4360template <>
4365
4366/// Support casting from VPRecipeBase -> VPIRMetadata.
4367template <>
4373
4374template <>
4379template <>
4381 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4382 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4383
4384/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4385/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4386/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4387class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4388 friend class VPlan;
4389
4390 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4391 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4392 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4393 if (Recipe)
4394 appendRecipe(Recipe);
4395 }
4396
4397public:
4399
4400protected:
4401 /// The VPRecipes held in the order of output instructions to generate.
4403
4404 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4405 : VPBlockBase(BlockSC, Name.str()) {}
4406
4407public:
4408 ~VPBasicBlock() override {
4409 while (!Recipes.empty())
4410 Recipes.pop_back();
4411 }
4412
4413 /// Instruction iterators...
4418
4419 //===--------------------------------------------------------------------===//
4420 /// Recipe iterator methods
4421 ///
4422 inline iterator begin() { return Recipes.begin(); }
4423 inline const_iterator begin() const { return Recipes.begin(); }
4424 inline iterator end() { return Recipes.end(); }
4425 inline const_iterator end() const { return Recipes.end(); }
4426
4427 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4428 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4429 inline reverse_iterator rend() { return Recipes.rend(); }
4430 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4431
4432 inline size_t size() const { return Recipes.size(); }
4433 inline bool empty() const { return Recipes.empty(); }
4434 inline const VPRecipeBase &front() const { return Recipes.front(); }
4435 inline VPRecipeBase &front() { return Recipes.front(); }
4436 inline const VPRecipeBase &back() const { return Recipes.back(); }
4437 inline VPRecipeBase &back() { return Recipes.back(); }
4438
4439 /// Returns a reference to the list of recipes.
4441
4442 /// Returns a pointer to a member of the recipe list.
4443 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4444 return &VPBasicBlock::Recipes;
4445 }
4446
4447 /// Method to support type inquiry through isa, cast, and dyn_cast.
4448 static inline bool classof(const VPBlockBase *V) {
4449 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4450 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4451 }
4452
4453 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4454 assert(Recipe && "No recipe to append.");
4455 assert(!Recipe->Parent && "Recipe already in VPlan");
4456 Recipe->Parent = this;
4457 Recipes.insert(InsertPt, Recipe);
4458 }
4459
4460 /// Augment the existing recipes of a VPBasicBlock with an additional
4461 /// \p Recipe as the last recipe.
4462 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4463
4464 /// The method which generates the output IR instructions that correspond to
4465 /// this VPBasicBlock, thereby "executing" the VPlan.
4466 void execute(VPTransformState *State) override;
4467
4468 /// Return the cost of this VPBasicBlock.
4469 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4470
4471 /// Return the position of the first non-phi node recipe in the block.
4472 iterator getFirstNonPhi();
4473
4474 /// Returns an iterator range over the PHI-like recipes in the block.
4478
4479 /// Split current block at \p SplitAt by inserting a new block between the
4480 /// current block and its successors and moving all recipes starting at
4481 /// SplitAt to the new block. Returns the new block.
4482 VPBasicBlock *splitAt(iterator SplitAt);
4483
4484 VPRegionBlock *getEnclosingLoopRegion();
4485 const VPRegionBlock *getEnclosingLoopRegion() const;
4486
4487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4488 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4489 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4490 ///
4491 /// Note that the numbering is applied to the whole VPlan, so printing
4492 /// individual blocks is consistent with the whole VPlan printing.
4493 void print(raw_ostream &O, const Twine &Indent,
4494 VPSlotTracker &SlotTracker) const override;
4495 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4496#endif
4497
4498 /// If the block has multiple successors, return the branch recipe terminating
4499 /// the block. If there are no or only a single successor, return nullptr;
4500 VPRecipeBase *getTerminator();
4501 const VPRecipeBase *getTerminator() const;
4502
4503 /// Returns true if the block is exiting it's parent region.
4504 bool isExiting() const;
4505
4506 /// Clone the current block and it's recipes, without updating the operands of
4507 /// the cloned recipes.
4508 VPBasicBlock *clone() override;
4509
4510 /// Returns the predecessor block at index \p Idx with the predecessors as per
4511 /// the corresponding plain CFG. If the block is an entry block to a region,
4512 /// the first predecessor is the single predecessor of a region, and the
4513 /// second predecessor is the exiting block of the region.
4514 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4515
4516protected:
4517 /// Execute the recipes in the IR basic block \p BB.
4518 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4519
4520 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4521 /// generated for this VPBB.
4522 void connectToPredecessors(VPTransformState &State);
4523
4524private:
4525 /// Create an IR BasicBlock to hold the output instructions generated by this
4526 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4527 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4528};
4529
4530inline const VPBasicBlock *
4532 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4533}
4534
4535/// A special type of VPBasicBlock that wraps an existing IR basic block.
4536/// Recipes of the block get added before the first non-phi instruction in the
4537/// wrapped block.
4538/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4539/// preheader block.
4540class VPIRBasicBlock : public VPBasicBlock {
4541 friend class VPlan;
4542
4543 BasicBlock *IRBB;
4544
4545 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4546 VPIRBasicBlock(BasicBlock *IRBB)
4547 : VPBasicBlock(VPIRBasicBlockSC,
4548 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4549 IRBB(IRBB) {}
4550
4551public:
4552 ~VPIRBasicBlock() override = default;
4553
4554 static inline bool classof(const VPBlockBase *V) {
4555 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4556 }
4557
4558 /// The method which generates the output IR instructions that correspond to
4559 /// this VPBasicBlock, thereby "executing" the VPlan.
4560 void execute(VPTransformState *State) override;
4561
4562 VPIRBasicBlock *clone() override;
4563
4564 BasicBlock *getIRBasicBlock() const { return IRBB; }
4565};
4566
4567/// Track information about the canonical IV value of a region.
4568/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4570 /// VPRegionValue for the canonical IV, whose allocation is managed by
4571 /// VPCanonicalIVInfo.
4572 std::unique_ptr<VPRegionValue> CanIV;
4573
4574 /// Whether the increment of the canonical IV may unsigned wrap or not.
4575 bool HasNUW = true;
4576
4577public:
4579 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4580
4581 VPRegionValue *getRegionValue() { return CanIV.get(); }
4582 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4583
4584 bool hasNUW() const { return HasNUW; }
4585
4586 void clearNUW() { HasNUW = false; }
4587};
4588
4589/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4590/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4591/// A VPRegionBlock may indicate that its contents are to be replicated several
4592/// times. This is designed to support predicated scalarization, in which a
4593/// scalar if-then code structure needs to be generated VF * UF times. Having
4594/// this replication indicator helps to keep a single model for multiple
4595/// candidate VF's. The actual replication takes place only once the desired VF
4596/// and UF have been determined.
4597class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4598 friend class VPlan;
4599
4600 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4601 VPBlockBase *Entry;
4602
4603 /// Hold the Single Exiting block of the SESE region modelled by the
4604 /// VPRegionBlock.
4605 VPBlockBase *Exiting;
4606
4607 /// Holds the Canonical IV of the loop region along with additional
4608 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4609 /// Loop regions retain their canonical IVs until they are dissolved, even if
4610 /// the canonical IV has no users.
4611 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4612
4613 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4614 /// VPRegionBlocks.
4615 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4616 const std::string &Name = "")
4617 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4618 if (Entry) {
4619 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4620 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4621 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4622 Entry->setParent(this);
4623 Exiting->setParent(this);
4624 }
4625 }
4626
4627 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4628 VPBlockBase *Exiting, const std::string &Name = "")
4629 : VPRegionBlock(Entry, Exiting, Name) {
4630 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4631 }
4632
4633public:
4634 ~VPRegionBlock() override = default;
4635
4636 /// Method to support type inquiry through isa, cast, and dyn_cast.
4637 static inline bool classof(const VPBlockBase *V) {
4638 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4639 }
4640
4641 const VPBlockBase *getEntry() const { return Entry; }
4642 VPBlockBase *getEntry() { return Entry; }
4643
4644 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4645 /// EntryBlock must have no predecessors.
4646 void setEntry(VPBlockBase *EntryBlock) {
4647 assert(!EntryBlock->hasPredecessors() &&
4648 "Entry block cannot have predecessors.");
4649 Entry = EntryBlock;
4650 EntryBlock->setParent(this);
4651 }
4652
4653 const VPBlockBase *getExiting() const { return Exiting; }
4654 VPBlockBase *getExiting() { return Exiting; }
4655
4656 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4657 /// ExitingBlock must have no successors.
4658 void setExiting(VPBlockBase *ExitingBlock) {
4659 assert(!ExitingBlock->hasSuccessors() &&
4660 "Exit block cannot have successors.");
4661 Exiting = ExitingBlock;
4662 ExitingBlock->setParent(this);
4663 }
4664
4665 /// Returns the pre-header VPBasicBlock of the loop region.
4667 assert(!isReplicator() && "should only get pre-header of loop regions");
4668 return getSinglePredecessor()->getExitingBasicBlock();
4669 }
4670
4671 /// An indicator whether this region is to generate multiple replicated
4672 /// instances of output IR corresponding to its VPBlockBases.
4673 bool isReplicator() const { return !CanIVInfo; }
4674
4675 /// The method which generates the output IR instructions that correspond to
4676 /// this VPRegionBlock, thereby "executing" the VPlan.
4677 void execute(VPTransformState *State) override;
4678
4679 // Return the cost of this region.
4680 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4681
4682#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4683 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4684 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4685 /// consequtive numbers.
4686 ///
4687 /// Note that the numbering is applied to the whole VPlan, so printing
4688 /// individual regions is consistent with the whole VPlan printing.
4689 void print(raw_ostream &O, const Twine &Indent,
4690 VPSlotTracker &SlotTracker) const override;
4691 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4692#endif
4693
4694 /// Clone all blocks in the single-entry single-exit region of the block and
4695 /// their recipes without updating the operands of the cloned recipes.
4696 VPRegionBlock *clone() override;
4697
4698 /// Remove the current region from its VPlan, connecting its predecessor to
4699 /// its entry, and its exiting block to its successor.
4700 void dissolveToCFGLoop();
4701
4702 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4703 /// a new increment before the terminator and return it. The canonical IV
4704 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4705 VPInstruction *getOrCreateCanonicalIVIncrement();
4706
4707 /// Return the canonical induction variable of the region, null for
4708 /// replicating regions.
4710 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4711 }
4713 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4714 }
4715
4716 /// Return the type of the canonical IV for loop regions.
4718 return CanIVInfo->getRegionValue()->getType();
4719 }
4720
4721 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4722 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4723
4724 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4726 assert(Increment && "Must provide increment to clear");
4727 Increment->dropPoisonGeneratingFlags();
4728 CanIVInfo->clearNUW();
4729 }
4730};
4731
4733 return getParent()->getParent();
4734}
4735
4737 return getParent()->getParent();
4738}
4739
4740/// VPlan models a candidate for vectorization, encoding various decisions take
4741/// to produce efficient output IR, including which branches, basic-blocks and
4742/// output IR instructions to generate, and their cost. VPlan holds a
4743/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4744/// VPBasicBlock.
4745class VPlan {
4746 friend class VPlanPrinter;
4747 friend class VPSlotTracker;
4748
4749 /// VPBasicBlock corresponding to the original preheader. Used to place
4750 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4751 /// rest of VPlan execution.
4752 /// When this VPlan is used for the epilogue vector loop, the entry will be
4753 /// replaced by a new entry block created during skeleton creation.
4754 VPBasicBlock *Entry;
4755
4756 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4757 VPIRBasicBlock *ScalarHeader;
4758
4759 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4760 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4761 /// e.g. if the scalar epilogue always executes.
4763
4764 /// Holds the VFs applicable to this VPlan.
4766
4767 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4768 /// any UF.
4770
4771 /// Holds the name of the VPlan, for printing.
4772 std::string Name;
4773
4774 /// Represents the trip count of the original loop, for folding
4775 /// the tail.
4776 VPValue *TripCount = nullptr;
4777
4778 /// Represents the backedge taken count of the original loop, for folding
4779 /// the tail. It equals TripCount - 1.
4780 VPSymbolicValue *BackedgeTakenCount = nullptr;
4781
4782 /// Represents the vector trip count.
4783 VPSymbolicValue VectorTripCount;
4784
4785 /// Represents the vectorization factor of the loop.
4786 VPSymbolicValue VF;
4787
4788 /// Represents the unroll factor of the loop.
4789 VPSymbolicValue UF;
4790
4791 /// Represents the loop-invariant VF * UF of the vector loop region.
4792 VPSymbolicValue VFxUF;
4793
4794 /// Contains all the external definitions created for this VPlan, as a mapping
4795 /// from IR Values to VPIRValues.
4797
4798 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4799 /// VPlan is destroyed.
4800 SmallVector<VPBlockBase *> CreatedBlocks;
4801
4802 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4803 /// wrapping the original header of the scalar loop. The vector loop will have
4804 /// index type \p IdxTy.
4805 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader, Type *IdxTy)
4806 : Entry(Entry), ScalarHeader(ScalarHeader), VectorTripCount(IdxTy),
4807 VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4808 Entry->setPlan(this);
4809 assert(ScalarHeader->getNumSuccessors() == 0 &&
4810 "scalar header must be a leaf node");
4811 }
4812
4813public:
4814 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4815 /// original preheader and scalar header of \p L, to be used as entry and
4816 /// scalar header blocks of the new VPlan. The vector loop will have index
4817 /// type \p IdxTy.
4818 VPlan(Loop *L, Type *IdxTy);
4819
4820 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4821 /// wrapping \p ScalarHeaderBB and vector loop index of type \p IdxTy.
4822 VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
4823 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4824 setEntry(createVPBasicBlock("preheader"));
4825 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4826 }
4827
4829
4831 Entry = VPBB;
4832 VPBB->setPlan(this);
4833 }
4834
4835 /// Generate the IR code for this VPlan.
4836 void execute(VPTransformState *State);
4837
4838 /// Return the cost of this plan.
4840
4841 VPBasicBlock *getEntry() { return Entry; }
4842 const VPBasicBlock *getEntry() const { return Entry; }
4843
4844 /// Returns the preheader of the vector loop region, if one exists, or null
4845 /// otherwise.
4847 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4848 return VectorRegion
4849 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4850 : nullptr;
4851 }
4852
4853 /// Returns the VPRegionBlock of the vector loop.
4856
4857 /// Returns true if this VPlan is for an outer loop, i.e., its vector
4858 /// loop region contains a nested loop region.
4859 LLVM_ABI_FOR_TEST bool isOuterLoop() const;
4860
4861 /// Returns the 'middle' block of the plan, that is the block that selects
4862 /// whether to execute the scalar tail loop or the exit block from the loop
4863 /// latch. If there is an early exit from the vector loop, the middle block
4864 /// conceptully has the early exit block as third successor, split accross 2
4865 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4866 /// tail loop or the exit block. If the scalar tail loop or exit block are
4867 /// known to always execute, the middle block may branch directly to that
4868 /// block. This function cannot be called once the vector loop region has been
4869 /// removed.
4871 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4872 assert(
4873 LoopRegion &&
4874 "cannot call the function after vector loop region has been removed");
4875 // The middle block is always the last successor of the region.
4876 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4877 }
4878
4880 return const_cast<VPlan *>(this)->getMiddleBlock();
4881 }
4882
4883 /// Return the VPBasicBlock for the preheader of the scalar loop.
4886 getScalarHeader()->getSinglePredecessor());
4887 }
4888
4889 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4890 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4891
4892 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4893 /// the original scalar loop.
4894 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4895
4896 /// Returns true if \p VPBB is an exit block.
4897 bool isExitBlock(VPBlockBase *VPBB);
4898
4899 /// The trip count of the original loop.
4901 assert(TripCount && "trip count needs to be set before accessing it");
4902 return TripCount;
4903 }
4904
4905 /// Set the trip count assuming it is currently null; if it is not - use
4906 /// resetTripCount().
4907 void setTripCount(VPValue *NewTripCount) {
4908 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4909 TripCount = NewTripCount;
4910 }
4911
4912 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4913 /// the original trip count have been replaced.
4914 void resetTripCount(VPValue *NewTripCount) {
4915 assert(TripCount && NewTripCount && TripCount->user_empty() &&
4916 "TripCount must be set when resetting");
4917 TripCount = NewTripCount;
4918 }
4919
4920 /// The backedge taken count of the original loop.
4922 // BTC shares the canonical IV type with VectorTripCount.
4923 if (!BackedgeTakenCount)
4924 BackedgeTakenCount = new VPSymbolicValue(VectorTripCount.getType());
4925 return BackedgeTakenCount;
4926 }
4927 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4928
4929 /// The vector trip count.
4930 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4931
4932 /// Returns the VF of the vector loop region.
4933 VPSymbolicValue &getVF() { return VF; };
4934 const VPSymbolicValue &getVF() const { return VF; };
4935
4936 /// Returns the UF of the vector loop region.
4937 VPSymbolicValue &getUF() { return UF; };
4938
4939 /// Returns VF * UF of the vector loop region.
4940 VPSymbolicValue &getVFxUF() { return VFxUF; }
4941
4944 }
4945
4946 const DataLayout &getDataLayout() const {
4948 }
4949
4950 void addVF(ElementCount VF) { VFs.insert(VF); }
4951
4953 assert(hasVF(VF) && "Cannot set VF not already in plan");
4954 VFs.clear();
4955 VFs.insert(VF);
4956 }
4957
4958 /// Remove \p VF from the plan.
4960 assert(hasVF(VF) && "tried to remove VF not present in plan");
4961 VFs.remove(VF);
4962 }
4963
4964 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4965 bool hasScalableVF() const {
4966 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4967 }
4968
4969 /// Returns an iterator range over all VFs of the plan.
4972 return VFs;
4973 }
4974
4975 /// Returns the single VF of the plan, asserting that the plan has exactly
4976 /// one VF.
4978 assert(VFs.size() == 1 && "expected plan with single VF");
4979 return VFs[0];
4980 }
4981
4982 bool hasScalarVFOnly() const {
4983 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4984 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4985 "Plan with scalar VF should only have a single VF");
4986 return HasScalarVFOnly;
4987 }
4988
4989 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4990
4991 /// Returns the concrete UF of the plan, after unrolling.
4992 unsigned getConcreteUF() const {
4993 assert(UFs.size() == 1 && "Expected a single UF");
4994 return UFs[0];
4995 }
4996
4997 void setUF(unsigned UF) {
4998 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4999 UFs.clear();
5000 UFs.insert(UF);
5001 }
5002
5003 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
5004 /// concrete UF.
5005 bool isUnrolled() const { return UFs.size() == 1; }
5006
5007 /// Return a string with the name of the plan and the applicable VFs and UFs.
5008 std::string getName() const;
5009
5010 void setName(const Twine &newName) { Name = newName.str(); }
5011
5012 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
5013 /// yet) for \p V.
5015 assert(V && "Trying to get or add the VPIRValue of a null Value");
5016 auto [It, Inserted] = LiveIns.try_emplace(V);
5017 if (Inserted) {
5018 if (auto *CI = dyn_cast<ConstantInt>(V))
5019 It->second = new VPConstantInt(CI);
5020 else
5021 It->second = new VPIRValue(V);
5022 }
5023
5024 assert(isa<VPIRValue>(It->second) &&
5025 "Only VPIRValues should be in mapping");
5026 return It->second;
5027 }
5029 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
5030 return getOrAddLiveIn(V->getValue());
5031 }
5032
5033 /// Return a VPIRValue wrapping i1 true.
5034 VPIRValue *getTrue() { return getConstantInt(1, 1); }
5035
5036 /// Return a VPIRValue wrapping i1 false.
5037 VPIRValue *getFalse() { return getConstantInt(1, 0); }
5038
5039 /// Return a VPIRValue wrapping the null value of type \p Ty.
5040 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
5041
5042 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
5044 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
5045 }
5046
5047 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
5048 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
5049 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
5050 }
5051
5052 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
5053 /// value.
5055 bool IsSigned = false) {
5056 return getConstantInt(APInt(BitWidth, Val, IsSigned));
5057 }
5058
5059 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
5061 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
5062 }
5063
5064 /// Return a VPIRValue wrapping a poison value of type \p Ty.
5066 return getOrAddLiveIn(PoisonValue::get(Ty));
5067 }
5068
5069 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
5070 /// otherwise.
5071 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
5072
5073 /// Return the list of live-in VPValues available in the VPlan.
5074 auto getLiveIns() const { return LiveIns.values(); }
5075
5076#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5077 /// Print the live-ins of this VPlan to \p O.
5078 void printLiveIns(raw_ostream &O) const;
5079
5080 /// Print this VPlan to \p O.
5081 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
5082
5083 /// Print this VPlan in DOT format to \p O.
5084 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
5085
5086 /// Dump the plan to stderr (for debugging).
5087 LLVM_DUMP_METHOD void dump() const;
5088#endif
5089
5090 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
5091 /// recipes to refer to the clones, and return it.
5093
5094 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
5095 /// present. The returned block is owned by the VPlan and deleted once the
5096 /// VPlan is destroyed.
5098 VPRecipeBase *Recipe = nullptr) {
5099 auto *VPB = new VPBasicBlock(Name, Recipe);
5100 CreatedBlocks.push_back(VPB);
5101 return VPB;
5102 }
5103
5104 /// Create a new loop region with a canonical IV using \p CanIVTy and
5105 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
5106 /// to \p Entry and \p Exiting respectively, if provided. The returned block
5107 /// is owned by the VPlan and deleted once the VPlan is destroyed.
5109 const std::string &Name = "",
5110 VPBlockBase *Entry = nullptr,
5111 VPBlockBase *Exiting = nullptr) {
5112 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
5113 CreatedBlocks.push_back(VPB);
5114 return VPB;
5115 }
5116
5117 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
5118 /// returned block is owned by the VPlan and deleted once the VPlan is
5119 /// destroyed.
5121 const std::string &Name = "") {
5122 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
5123 CreatedBlocks.push_back(VPB);
5124 return VPB;
5125 }
5126
5127 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
5128 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
5129 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
5131
5132 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
5133 /// instructions in \p IRBB, except its terminator which is managed by the
5134 /// successors of the block in VPlan. The returned block is owned by the VPlan
5135 /// and deleted once the VPlan is destroyed.
5137
5138 /// Returns true if the VPlan is based on a loop with an early exit. That is
5139 /// the case if the VPlan has either more than one exit block or a single exit
5140 /// block with multiple predecessors (one for the exit via the latch and one
5141 /// via the other early exit).
5142 bool hasEarlyExit() const {
5143 return count_if(ExitBlocks,
5144 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
5145 1 ||
5146 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
5147 }
5148
5149 /// Returns true if the scalar tail may execute after the vector loop, i.e.
5150 /// if the middle block is a predecessor of the scalar preheader. Note that
5151 /// this relies on unneeded branches to the scalar tail loop being removed.
5152 bool hasScalarTail() const {
5153 auto *ScalarPH = getScalarPreheader();
5154 return ScalarPH &&
5155 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
5156 }
5157
5158 /// The type of the canonical induction variable of the vector loop.
5159 Type *getIndexType() const { return VF.getType(); }
5160};
5161
5162#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5163inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
5164 Plan.print(OS);
5165 return OS;
5166}
5167#endif
5168
5169} // end namespace llvm
5170
5171#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ABI
Definition Compiler.h:215
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:663
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:220
#define LLVM_PACKED_START
Definition Compiler.h:556
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
static Interval intersect(const Interval &I1, const Interval &I2)
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:585
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
Get the last element.
Definition ArrayRef.h:150
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:126
static DebugLoc getUnknown()
Definition DebugLoc.h:153
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool isCast() const
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1069
Root of the metadata hierarchy.
Definition Metadata.h:64
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an assumption made using SCEV expressions which can be checked at run-time.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4050
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:4044
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4387
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4415
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4462
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4417
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4414
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4440
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4398
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4404
iterator end()
Definition VPlan.h:4424
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4422
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4416
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4475
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:763
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
~VPBasicBlock() override
Definition VPlan.h:4408
const_reverse_iterator rbegin() const
Definition VPlan.h:4428
reverse_iterator rend()
Definition VPlan.h:4429
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4402
VPRecipeBase & back()
Definition VPlan.h:4437
const VPRecipeBase & front() const
Definition VPlan.h:4434
const_iterator begin() const
Definition VPlan.h:4423
VPRecipeBase & front()
Definition VPlan.h:4435
const VPRecipeBase & back() const
Definition VPlan.h:4436
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4453
bool empty() const
Definition VPlan.h:4433
const_iterator end() const
Definition VPlan.h:4425
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4448
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4443
reverse_iterator rbegin()
Definition VPlan.h:4427
friend class VPlan
Definition VPlan.h:4388
size_t size() const
Definition VPlan.h:4432
const_reverse_iterator rend() const
Definition VPlan.h:4430
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2989
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2994
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2950
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2984
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3006
VPBlendRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:2971
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2969
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:3000
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2980
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:94
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:315
VPRegionBlock * getParent()
Definition VPlan.h:186
VPBlocksTy & getPredecessors()
Definition VPlan.h:223
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:220
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:385
void setName(const Twine &newName)
Definition VPlan.h:179
size_t getNumSuccessors() const
Definition VPlan.h:237
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:219
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:217
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:337
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:661
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:173
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:273
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:350
size_t getNumPredecessors() const
Definition VPlan.h:238
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:306
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:258
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:343
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:215
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:222
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:171
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:230
const VPRegionBlock * getParent() const
Definition VPlan.h:187
const std::string & getName() const
Definition VPlan.h:177
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:325
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:263
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:297
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:233
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:257
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:322
friend class VPBlockUtils
Definition VPlan.h:95
unsigned getVPBlockID() const
Definition VPlan.h:184
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:364
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:329
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:163
VPBlocksTy & getSuccessors()
Definition VPlan.h:212
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:250
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:286
void setParent(VPRegionBlock *P)
Definition VPlan.h:197
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:279
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:227
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:211
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3510
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3494
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3518
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3491
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4581
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4578
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4582
bool hasNUW() const
Definition VPlan.h:4584
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4082
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:4076
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:4094
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:4088
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4101
~VPCurrentIterationPHIRecipe() override=default
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:4211
VPValue * getIndex() const
Definition VPlan.h:4208
const FPMathOperator * getFPBinOp() const
Definition VPlan.h:4210
VPIRValue * getStartValue() const
Definition VPlan.h:4207
VPValue * getStepValue() const
Definition VPlan.h:4209
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4199
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4192
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step)
Definition VPlan.h:4183
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4214
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4019
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:4024
VPExpandSCEVRecipe(const SCEV *Expr)
const SCEV * getSCEV() const
Definition VPlan.h:4030
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4015
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3669
bool isVectorToScalar() const
Returns true if this VPExpressionRecipe produces a single scalar.
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3651
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3580
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3633
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3621
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3578
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3596
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3600
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3663
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3594
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2436
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2438
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2447
void addBackedgeValue(VPValue *V)
Add V as the incoming value from the loop backedge.
Definition VPlan.h:2491
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2460
static bool classof(const VPValue *V)
Definition VPlan.h:2457
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2483
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2443
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2488
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2472
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2480
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2453
VPValue * getStartValue() const
Definition VPlan.h:2475
void execute(VPTransformState &State) override=0
Generate the phi nodes.
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition VPlan.h:2163
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2176
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2193
unsigned getOpcode() const
Definition VPlan.h:2189
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2168
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4540
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:473
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4564
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4554
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4541
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:498
Class to record and manage LLVM IR flags.
Definition VPlan.h:695
FastMathFlagsTy FMFs
Definition VPlan.h:783
ReductionFlagsTy ReductionFlags
Definition VPlan.h:785
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1038
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:876
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:856
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:842
WrapFlagsTy WrapFlags
Definition VPlan.h:777
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:835
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:1000
bool isReductionOrdered() const
Definition VPlan.h:1064
TruncFlagsTy TruncFlags
Definition VPlan.h:778
CmpInst::Predicate getPredicate() const
Definition VPlan.h:972
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1048
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlagsOrNone() const
uint8_t AllFlags[2]
Definition VPlan.h:786
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1008
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:881
ExactFlagsTy ExactFlags
Definition VPlan.h:780
bool hasNoSignedWrap() const
Definition VPlan.h:1027
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1052
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:847
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:852
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:861
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:830
uint8_t GEPFlagsStorage
Definition VPlan.h:781
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:866
bool isNonNeg() const
Definition VPlan.h:1010
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:990
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:995
DisjointFlagsTy DisjointFlags
Definition VPlan.h:779
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:980
bool hasNoUnsignedWrap() const
Definition VPlan.h:1016
FCmpFlagsTy FCmpFlags
Definition VPlan.h:784
NonNegFlagsTy NonNegFlags
Definition VPlan.h:782
bool isReductionInLoop() const
Definition VPlan.h:1070
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:892
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:929
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:871
uint8_t CmpPredStorage
Definition VPlan.h:776
RecurKind getRecurKind() const
Definition VPlan.h:1058
VPIRFlags(Instruction &I)
Definition VPlan.h:792
Instruction & getInstruction() const
Definition VPlan.h:1752
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1760
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1739
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1766
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1754
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1727
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1171
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1207
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1179
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
VPIRMetadata()=default
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1191
static bool classof(const VPUser *R)
Definition VPlan.h:1570
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1550
Type * getResultType() const
Definition VPlan.h:1588
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1574
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Value *UV=nullptr)
Definition VPlan.h:1541
void execute(VPTransformState &State) override
Generate the instruction.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
bool usesScalars(const VPValue *Op) const override
Cast recipes always use scalars of their operand.
Definition VPlan.h:1591
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1226
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1473
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1495
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1404
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1328
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1319
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1332
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1344
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1322
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1269
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1315
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1264
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1261
@ VScale
Returns the value for vscale.
Definition VPlan.h:1348
@ CanonicalIVIncrementForPart
Definition VPlan.h:1245
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1272
bool hasResult() const
Definition VPlan.h:1438
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1498
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1478
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1520
unsigned getOpcode() const
Definition VPlan.h:1417
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1523
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1489
VPInstruction * cloneWithOperands(ArrayRef< VPValue * > NewOperands, Type *ResultTy=nullptr)
Definition VPlan.h:1408
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1463
A common base class for interleaved memory operations.
Definition VPlan.h:3031
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:3094
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3100
static bool classof(const VPUser *U)
Definition VPlan.h:3076
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3043
Instruction * getInsertPos() const
Definition VPlan.h:3098
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3071
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:3096
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3088
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:3117
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3082
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3197
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3191
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3204
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3184
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3171
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:3127
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3154
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3137
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3148
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3129
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
A VPRecipeValue defined by a multi-def recipe, stores a pointer to it.
Definition VPlanValue.h:365
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1607
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1636
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
Definition VPlan.h:1665
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1631
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4531
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1656
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1616
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1641
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1645
~VPPredInstPHIRecipe() override=default
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3709
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3720
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3704
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:550
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4732
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenMemIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:420
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:558
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:477
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:555
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:526
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:404
const VPBasicBlock * getParent() const
Definition VPlan.h:478
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:531
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:523
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:467
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
Definition VPlanValue.h:338
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3363
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3342
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3366
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3353
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2911
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2902
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2884
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2895
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2923
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2865
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2914
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2928
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPReductionPHIRecipe * cloneWithOperands(VPValue *Start, VPValue *BackedgeValue)
Definition VPlan.h:2877
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2920
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2908
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3220
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3229
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3305
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3274
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3289
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3316
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3318
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3301
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3254
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3303
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3260
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3307
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3314
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3309
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3268
static bool classof(const VPUser *U)
Definition VPlan.h:3279
static bool classof(const VPValue *VPV)
Definition VPlan.h:3284
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3323
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4597
const VPBlockBase * getEntry() const
Definition VPlan.h:4641
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4673
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4658
VPBlockBase * getExiting()
Definition VPlan.h:4654
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4712
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4646
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4717
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4722
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4725
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4709
const VPBlockBase * getExiting() const
Definition VPlan.h:4653
VPBlockBase * getEntry()
Definition VPlan.h:4642
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4666
friend class VPlan
Definition VPlan.h:4598
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4637
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:216
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3385
bool isSingleScalar() const
Definition VPlan.h:3443
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the recipe is predicated.
Definition VPlan.h:3474
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3393
~VPReplicateRecipe() override=default
static Type * computeScalarType(const Instruction *I, ArrayRef< VPValue * > Operands)
Compute the scalar result type for a VPReplicateRecipe wrapping I with Operands (excluding any predic...
VPReplicateRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:3417
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3455
operand_range operandsWithoutMask()
Return the recipe's operands, excluding the mask of a predicated recipe.
Definition VPlan.h:3468
bool isPredicated() const
Definition VPlan.h:3445
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3415
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3448
unsigned getOpcode() const
Definition VPlan.h:3478
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3462
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4303
VPValue * getStepValue() const
Definition VPlan.h:4276
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4246
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4289
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4258
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4284
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4280
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:4239
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4297
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:609
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:615
static bool classof(const VPValue *V)
Definition VPlan.h:667
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:680
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:624
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:683
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, Value *UV=nullptr, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:619
static bool classof(const VPUser *U)
Definition VPlan.h:672
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:611
LLVM_ABI_FOR_TEST VPSingleDefValue(VPSingleDefRecipe *Def, Value *UV=nullptr, Type *Ty=nullptr)
Construct a VPSingleDefValue. Must only be used by VPSingleDefRecipe.
Definition VPlan.cpp:169
This class can be used to assign names to VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:385
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1505
operand_range operands()
Definition VPlanValue.h:458
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:431
unsigned getNumOperands() const
Definition VPlanValue.h:425
operand_iterator op_end()
Definition VPlanValue.h:456
operand_iterator op_begin()
Definition VPlanValue.h:454
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:426
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:406
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:452
virtual bool usesScalars(const VPValue *Op) const
Returns true if the VPUser uses scalars of operand Op.
Definition VPlanValue.h:465
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:451
void addOperand(VPValue *Operand)
Definition VPlanValue.h:411
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Definition VPlan.cpp:149
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:75
bool user_empty() const
Definition VPlanValue.h:161
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:209
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2306
VPValue * getVFValue() const
Definition VPlan.h:2287
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2284
int64_t getStride() const
Definition VPlan.h:2285
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2327
VPValue * getOffset() const
Definition VPlan.h:2288
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2320
void addOffset(VPValue *Offset)
Append Offset as the offset operand.
Definition VPlan.h:2298
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2274
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2313
VPValue * getPointer() const
Definition VPlan.h:2286
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
void addPerPartOffset(VPValue *VFxPart)
Add the per-part offset (VFxPart) used for unrolled parts > 0.
Definition VPlan.h:2368
VPValue * getStride() const
Definition VPlan.h:2361
Type * getSourceElementType() const
Definition VPlan.h:2376
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2378
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2385
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2352
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2402
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2392
VPValue * getVFxPart() const
Definition VPlan.h:2363
A recipe for widening Call instructions using library calls.
Definition VPlan.h:2097
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:2104
const_operand_range args() const
Definition VPlan.h:2145
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2123
operand_range args()
Definition VPlan.h:2144
Function * getCalledScalarFunction() const
Definition VPlan.h:2140
~VPWidenCallRecipe() override=default
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV, const VPIRFlags::WrapFlagsTy &Flags={false, false})
Definition VPlan.h:4120
~VPWidenCanonicalIVRecipe() override=default
VPValue * getStepValue() const
Definition VPlan.h:4153
void addPerPartStep(VPValue *Step)
Add the per-part step (VF * Part) used for unrolled parts.
Definition VPlan.h:4158
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:4142
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:4149
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4127
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4137
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1878
Instruction::CastOps getOpcode() const
Definition VPlan.h:1914
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1883
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1899
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2236
Type * getSourceElementType() const
Definition VPlan.h:2241
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2244
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2227
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), GetElementPtrInst *UV=nullptr)
Definition VPlan.h:2210
void execute(VPTransformState &State) override=0
Generate the phi nodes.
ArrayRef< const SCEVPredicate * > getNoWrapPredicates() const
Returns the SCEV predicates associated with this induction.
Definition VPlan.h:2584
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2596
static bool classof(const VPValue *V)
Definition VPlan.h:2546
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2565
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2588
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2558
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2573
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2576
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2514
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2561
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2520
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2581
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2541
const VPValue * getVFValue() const
Definition VPlan.h:2568
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2551
const VPValue * getStepValue() const
Definition VPlan.h:2562
void addUnrolledPartOperands(VPValue *SplatVFStep, VPValue *LastPart)
After unrolling, append the splat-VF step (VF * step) and the value of the induction at the last unro...
Definition VPlan.h:2529
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2657
const TruncInst * getTruncInst() const
Definition VPlan.h:2673
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2651
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2661
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2643
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2617
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2672
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2626
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2683
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2668
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1925
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1975
CallInst * createVectorCall(VPTransformState &State)
Helper function to produce the widened intrinsic call.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:2029
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:2035
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1961
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:2041
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2011
static bool classof(const VPValue *V)
Definition VPlan.h:2006
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1986
VPWidenIntrinsicRecipe(const unsigned char SC, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1939
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:2038
~VPWidenIntrinsicRecipe() override=default
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1996
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
static bool classof(const VPUser *U)
Definition VPlan.h:2001
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
void execute(VPTransformState &State) override
Produce a widened version of the vector memory intrinsic.
~VPWidenMemIntrinsicRecipe() override=default
VPWidenMemIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2074
VPWidenMemIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2060
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector memory intrinsic.
A common mixin class for widening memory operations.
Definition VPlan.h:3736
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3747
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3772
virtual ~VPWidenMemoryRecipe()=default
Instruction & Ingredient
Definition VPlan.h:3738
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition VPlan.h:3794
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3744
virtual const VPRecipeBase * getAsRecipe() const =0
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3782
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3741
VPWidenMemoryRecipe(Instruction &I, bool Consecutive, const VPIRMetadata &Metadata)
Definition VPlan.h:3759
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3778
void setMask(VPValue *Mask)
Definition VPlan.h:3749
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3789
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3775
A recipe for widened phis.
Definition VPlan.h:2741
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2783
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2761
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPWidenPHIRecipe(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe with incoming values IncomingValues, debug location DL and Name.
Definition VPlan.h:2748
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2710
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2719
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2700
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1817
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1838
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1867
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1821
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1828
~VPWidenRecipe() override=default
VPWidenRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:1840
unsigned getOpcode() const
Definition VPlan.h:1857
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4745
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:5071
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1162
friend class VPSlotTracker
Definition VPlan.h:4747
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1138
bool hasVF(ElementCount VF) const
Definition VPlan.h:4964
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:4977
const DataLayout & getDataLayout() const
Definition VPlan.h:4946
LLVMContext & getContext() const
Definition VPlan.h:4942
VPBasicBlock * getEntry()
Definition VPlan.h:4841
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:5159
void setName(const Twine &newName)
Definition VPlan.h:5010
bool hasScalableVF() const
Definition VPlan.h:4965
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4900
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4921
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4971
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:885
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:5028
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:902
const VPBasicBlock * getEntry() const
Definition VPlan.h:4842
friend class VPlanPrinter
Definition VPlan.h:4746
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:5037
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:5060
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4940
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:5043
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:5120
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1297
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:5074
bool hasUF(unsigned UF) const
Definition VPlan.h:4989
VPIRValue * getPoison(Type *Ty)
Return a VPIRValue wrapping a poison value of type Ty.
Definition VPlan.h:5065
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4894
VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and vect...
Definition VPlan.h:4822
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4930
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4927
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:5014
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:5108
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:5040
void setVF(ElementCount VF)
Definition VPlan.h:4952
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:5005
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1053
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:5142
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1035
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1068
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4992
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:5054
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4879
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4907
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4914
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4870
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4830
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:5097
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1303
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4959
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:5034
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4846
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1168
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4937
bool hasScalarVFOnly() const
Definition VPlan.h:4982
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4884
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:912
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1121
void addVF(ElementCount VF)
Definition VPlan.h:4950
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4890
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1077
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4933
void setUF(unsigned UF)
Definition VPlan.h:4997
const VPSymbolicValue & getVF() const
Definition VPlan.h:4934
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:5152
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1209
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:5048
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2507
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
CastInfo helper for casting from VPRecipeBase to a mixin class that is not part of the VPRecipeBase c...
Definition VPlan.h:4316
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:573
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
LLVM_PACKED_END
Definition VPlan.h:1113
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2838
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
Type * toScalarizedTy(Type *Ty)
A helper for converting vectorized types to scalarized (non-vector) types.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:79
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:89
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isPointerTy(const Type *T)
Definition SPIRVUtils.h:377
LLVM_ABI Type * computeScalarTypeForInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands)
Compute the scalar result type for an IR Opcode given Operands.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Add
Sum of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FAdd
Sum of floats.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2836
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:74
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2830
Possible variants of a reduction.
Definition VPlan.h:2828
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2833
unsigned VFScaleFactor
Definition VPlan.h:2834
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:342
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:265
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2799
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2811
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2790
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:727
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:732
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:722
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:715
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1785
PHINode & getIRPhi()
Definition VPlan.h:1798
VPIRPhi(PHINode &PN)
Definition VPlan.h:1786
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1788
static bool classof(const VPUser *U)
Definition VPlan.h:1793
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1809
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:247
static bool classof(const VPUser *U)
Definition VPlan.h:1685
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1700
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1715
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1695
static bool classof(const VPValue *V)
Definition VPlan.h:1690
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="", Type *ResultTy=nullptr)
Definition VPlan.h:1680
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1117
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1158
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1129
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1118
static bool classof(const VPValue *V)
Definition VPlan.h:1151
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1123
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1146
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:287
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3850
VPWidenLoadEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3860
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3867
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3851
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3877
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3800
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3801
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3826
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3809
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadRecipe.
Definition VPlan.h:3820
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3953
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3969
VPWidenStoreEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3962
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3954
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3982
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3972
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3899
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3900
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3917
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3908
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreRecipe.
Definition VPlan.h:3923
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3929
static VPMixin * castFailed()
Definition VPlan.h:4334
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4325
static VPMixin * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4328