LLVM 23.0.0git
VPlanTransforms.h
Go to the documentation of this file.
1//===- VPlanTransforms.h - Utility VPlan to VPlan transforms --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides utility VPlan to VPlan transformations.
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
14#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
15
16#include "VPlan.h"
17#include "VPlanVerifier.h"
19#include "llvm/ADT/ScopeExit.h"
23#include "llvm/Support/Regex.h"
24
25namespace llvm {
26
28class Instruction;
29class Loop;
30class LoopVersioning;
32class PHINode;
33class ScalarEvolution;
37class VPBuilder;
38class VPRecipeBuilder;
39struct VFRange;
40
43
44#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
48#endif
49
51 /// Helper to run a VPlan pass \p Pass on \p VPlan, forwarding extra arguments
52 /// to the pass. Performs verification/printing after each VPlan pass if
53 /// requested via command line options.
54 template <bool EnableVerify = true, typename PassTy, typename... ArgsTy>
55 static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan,
56 ArgsTy &&...Args) {
57 scope_exit PostTransformActions{[&]() {
58#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
59 // Make sure to print before verification, so that output is more useful
60 // in case of failures:
62 (VPlanPrintAfterPasses.getNumOccurrences() > 0 &&
64 return Regex(Entry).match(PassName);
65 }))) {
66 dbgs()
67 << "VPlan for loop in '"
69 << "' after " << PassName << '\n';
72 else
73 dbgs() << Plan << '\n';
74 }
75#endif
76 if (VerifyEachVPlan && EnableVerify) {
77 if (!verifyVPlanIsValid(Plan))
78 report_fatal_error("Broken VPlan found, compilation aborted!");
79 }
80 }};
81
82 return std::forward<PassTy>(Pass)(Plan, std::forward<ArgsTy>(Args)...);
83 }
84#define RUN_VPLAN_PASS(PASS, ...) \
85 llvm::VPlanTransforms::runPass(#PASS, PASS, __VA_ARGS__)
86#define RUN_VPLAN_PASS_NO_VERIFY(PASS, ...) \
87 llvm::VPlanTransforms::runPass<false>(#PASS, PASS, __VA_ARGS__)
88
89 /// Create a base VPlan0, serving as the common starting point for all later
90 /// candidates. It consists of an initial plain CFG loop with loop blocks from
91 /// \p TheLoop being directly translated to VPBasicBlocks with VPInstruction
92 /// corresponding to the input IR.
93 ///
94 /// The created loop is wrapped in an initial skeleton to facilitate
95 /// vectorization, consisting of a vector pre-header, an exit block for the
96 /// main vector loop (middle.block) and a new block as preheader of the scalar
97 /// loop (scalar.ph). See below for an illustration. It also adds a canonical
98 /// IV and its increment, using \p InductionTy and \p IVDL, and creates a
99 /// VPValue expression for the original trip count.
100 ///
101 /// [ ] <-- Plan's entry VPIRBasicBlock, wrapping the original loop's
102 /// / \ old preheader. Will contain iteration number check and SCEV
103 /// | | expansions.
104 /// | |
105 /// / v
106 /// | [ ] <-- vector loop bypass (may consist of multiple blocks) will be
107 /// | / | added later.
108 /// | / v
109 /// || [ ] <-- vector pre header.
110 /// |/ |
111 /// | v
112 /// | [ ] \ <-- plain CFG loop wrapping original loop to be vectorized.
113 /// | [ ]_|
114 /// | |
115 /// | v
116 /// | [ ] <--- middle-block with the branch to successors
117 /// | / |
118 /// | / |
119 /// | | v
120 /// \--->[ ] <--- scalar preheader (initial a VPBasicBlock, which will be
121 /// | | replaced later by a VPIRBasicBlock wrapping the scalar
122 /// | | preheader basic block.
123 /// | |
124 /// v <-- edge from middle to exit iff epilogue is not required.
125 /// | [ ] \
126 /// | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue,
127 /// | | header wrapped in VPIRBasicBlock).
128 /// \ |
129 /// \ v
130 /// >[ ] <-- original loop exit block(s), wrapped in VPIRBasicBlocks.
131 LLVM_ABI_FOR_TEST static std::unique_ptr<VPlan>
132 buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL,
133 PredicatedScalarEvolution &PSE, LoopVersioning *LVer = nullptr);
134
135 /// Replace VPPhi recipes in \p Plan's header with corresponding
136 /// VPHeaderPHIRecipe subclasses for inductions, reductions, and
137 /// fixed-order recurrences. This processes all header phis and creates
138 /// the appropriate widened recipe for each one. For fixed-order
139 /// recurrences, also creates FirstOrderRecurrenceSplice instructions and
140 /// sinks/hoists users as needed. Returns false if any fixed-order
141 /// recurrence cannot be handled.
142 static bool createHeaderPhiRecipes(
143 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
144 const MapVector<PHINode *, InductionDescriptor> &Inductions,
145 const MapVector<PHINode *, RecurrenceDescriptor> &Reductions,
146 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
147 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering);
148
149 /// Create VPReductionRecipes for in-loop reductions. This processes chains
150 /// of operations contributing to in-loop reductions and creates appropriate
151 /// VPReductionRecipe instances.
152 static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF);
153
154 /// Update \p Plan to account for all early exits. If \p Style is not
155 /// NoUncountableExit, handles uncountable early exits and checks that all
156 /// loads are dereferenceable. Returns false if a non-dereferenceable load is
157 /// found.
158 LLVM_ABI_FOR_TEST static bool
159 handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop,
160 PredicatedScalarEvolution &PSE, DominatorTree &DT,
161 AssumptionCache *AC);
162
163 /// If a check is needed to guard executing the scalar epilogue loop, it will
164 /// be added to the middle block.
165 LLVM_ABI_FOR_TEST static void addMiddleCheck(VPlan &Plan, bool TailFolded);
166
167 // Create a check to \p Plan to see if the vector loop should be executed.
168 // If \p CheckBlock is non-null, the compare and branch are placed there;
169 // ExpandSCEV recipes are always placed in Entry.
170 static void addMinimumIterationCheck(
171 VPlan &Plan, ElementCount VF, unsigned UF,
172 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
173 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
174 DebugLoc DL, PredicatedScalarEvolution &PSE,
175 VPBasicBlock *CheckBlock = nullptr);
176
177 /// Add a new check block before the vector preheader to \p Plan to check if
178 /// the main vector loop should be executed (TC >= VF * UF).
179 static void
180 addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF,
181 bool RequiresScalarEpilogue, Loop *OrigLoop,
183 DebugLoc DL, PredicatedScalarEvolution &PSE);
184
185 /// Add a check to \p Plan to see if the epilogue vector loop should be
186 /// executed.
188 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
189 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
190 unsigned EpilogueLoopStep, ScalarEvolution &SE);
191
192 /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
193 /// flat CFG into a hierarchical CFG.
194 LLVM_ABI_FOR_TEST static void createLoopRegions(VPlan &Plan);
195
196 /// Wrap runtime check block \p CheckBlock in a VPIRBB and \p Cond in a
197 /// VPValue and connect the block to \p Plan, using the VPValue as branch
198 /// condition.
199 static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock,
200 bool AddBranchWeights);
201
202 /// Replaces the VPInstructions in \p Plan with corresponding
203 /// widen recipes. Returns false if any VPInstructions could not be converted
204 /// to a wide recipe if needed.
205 LLVM_ABI_FOR_TEST static bool
207 const TargetLibraryInfo &TLI);
208
209 /// Try to legalize reductions with multiple in-loop uses. Currently only
210 /// strict and non-strict min/max reductions used by FindLastIV reductions are
211 /// supported, corresponding to computing the first and last argmin/argmax,
212 /// respectively. Otherwise return false.
213 static bool handleMultiUseReductions(VPlan &Plan,
214 OptimizationRemarkEmitter *ORE,
215 Loop *TheLoop);
216
217 /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
218 /// try to update the vector loop to exit early if any input is NaN and resume
219 /// executing in the scalar loop to handle the NaNs there. Return false if
220 /// this attempt was unsuccessful.
221 static bool handleMaxMinNumReductions(VPlan &Plan);
222
223 /// Check if \p Plan contains any FindLast reductions. If it does, try to
224 /// update the vector loop to save the appropriate state using selects
225 /// for entire vectors for both the latest mask containing at least one active
226 /// element and the corresponding data vector. Return false if this attempt
227 /// was unsuccessful.
228 static bool handleFindLastReductions(VPlan &Plan);
229
230 /// Clear NSW/NUW flags from reduction instructions if necessary.
231 static void clearReductionWrapFlags(VPlan &Plan);
232
233 /// Explicitly unroll \p Plan by \p UF.
234 static void unrollByUF(VPlan &Plan, unsigned UF);
235
236 /// Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and
237 /// VPInstruction in \p Plan with \p VF single-scalar recipes. Replicate
238 /// regions are dissolved by replicating their blocks and their recipes \p VF
239 /// times.
240 /// TODO: Also dissolve replicate regions with live outs.
241 static void replicateByVF(VPlan &Plan, ElementCount VF);
242
243 /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
244 /// resulting plan to \p BestVF and \p BestUF.
245 static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
246 unsigned BestUF,
247 PredicatedScalarEvolution &PSE);
248
249 /// Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL
250 /// is known to be <= VF, replacing them with the AVL directly.
251 static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
252 PredicatedScalarEvolution &PSE);
253
254 /// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
255 /// optimizations, dead recipe removal, replicate region optimizations and
256 /// block merging.
257 LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan);
258
259 /// Remove redundant VPBasicBlocks by merging them into their single
260 /// predecessor if the latter has a single successor.
261 static bool mergeBlocksIntoPredecessors(VPlan &Plan);
262
263 /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
264 /// region block and remove the mask operand. Optimize the created regions by
265 /// iteratively sinking scalar operands into the region, followed by merging
266 /// regions until no improvements are remaining.
267 static void createAndOptimizeReplicateRegions(VPlan &Plan);
268
269 /// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
270 /// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
271 /// UseActiveLaneMaskForControlFlow is true, introduce an
272 /// VPActiveLaneMaskPHIRecipe.
273 static void addActiveLaneMask(VPlan &Plan,
274 bool UseActiveLaneMaskForControlFlow);
275
276 /// Insert truncates and extends for any truncated recipe. Redundant casts
277 /// will be folded later.
278 static void
279 truncateToMinimalBitwidths(VPlan &Plan,
280 const MapVector<Instruction *, uint64_t> &MinBWs);
281
282 /// Replace symbolic strides from \p StridesMap in \p Plan with constants when
283 /// possible.
284 static void
285 replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE,
286 const DenseMap<Value *, const SCEV *> &StridesMap);
287
288 /// Drop poison flags from recipes that may generate a poison value that is
289 /// used after vectorization, even when their operands are not poison. Those
290 /// recipes meet the following conditions:
291 /// * Contribute to the address computation of a recipe generating a widen
292 /// memory load/store (VPWidenMemoryInstructionRecipe or
293 /// VPInterleaveRecipe).
294 /// * Such a widen memory load/store is masked, but not with the header mask.
295 static void dropPoisonGeneratingRecipes(VPlan &Plan);
296
297 /// Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and
298 /// replaces all uses of the canonical IV except for the canonical IV
299 /// increment with a VPCurrentIterationPHIRecipe. The canonical IV is only
300 /// used to control the loop after this transformation.
301 static void
302 addExplicitVectorLength(VPlan &Plan,
303 const std::optional<unsigned> &MaxEVLSafeElements);
304
305 /// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
306 /// example:
307 ///
308 /// %mask = icmp ult step-vector, EVL
309 /// %load = load %ptr, %mask
310 /// -->
311 /// %load = vp.load %ptr, EVL
312 static void optimizeEVLMasks(VPlan &Plan);
313
314 // For each Interleave Group in \p InterleaveGroups replace the Recipes
315 // widening its memory instructions with a single VPInterleaveRecipe at its
316 // insertion point.
317 static void createInterleaveGroups(
318 VPlan &Plan,
319 const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
320 &InterleaveGroups,
321 const bool &EpilogueAllowed);
322
323 /// Remove dead recipes from \p Plan.
324 static void removeDeadRecipes(VPlan &Plan);
325
326 /// Update \p Plan to account for uncountable early exits by introducing
327 /// appropriate branching logic in the latch that handles early exits and the
328 /// latch exit condition. Multiple exits are handled with a dispatch block
329 /// that determines which exit to take based on lane-by-lane semantics.
330 static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
331 VPBasicBlock *LatchVPBB,
332 VPBasicBlock *MiddleVPBB,
334
335 /// Replaces the exit condition from
336 /// (branch-on-cond eq CanonicalIVInc, VectorTripCount)
337 /// to
338 /// (branch-on-cond eq AVLNext, 0)
339 static void convertEVLExitCond(VPlan &Plan);
340
341 /// Replace loop regions with explicit CFG.
342 static void dissolveLoopRegions(VPlan &Plan);
343
344 /// Expand BranchOnTwoConds instructions into explicit CFG with
345 /// BranchOnCond instructions. Should be called after dissolveLoopRegions.
346 static void expandBranchOnTwoConds(VPlan &Plan);
347
348 /// Transform loops with variable-length stepping after region
349 /// dissolution.
350 ///
351 /// Once loop regions are replaced with explicit CFG, loops can step with
352 /// variable vector lengths instead of fixed lengths. This transformation:
353 /// * Makes CurrentIteration-Phi concrete.
354 // * Removes CanonicalIV and increment.
355 static void convertToVariableLengthStep(VPlan &Plan);
356
357 /// Lower abstract recipes to concrete ones, that can be codegen'd.
358 static void convertToConcreteRecipes(VPlan &Plan);
359
360 /// This function converts initial recipes to the abstract recipes and clamps
361 /// \p Range based on cost model for following optimizations and cost
362 /// estimations. The converted abstract recipes will lower to concrete
363 /// recipes before codegen.
364 static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
365 VFRange &Range);
366
367 /// Perform instcombine-like simplifications on recipes in \p Plan.
368 static void simplifyRecipes(VPlan &Plan);
369
370 /// Remove BranchOnCond recipes with true or false conditions together with
371 /// removing dead edges to their successors. If \p OnlyLatches is true, only
372 /// process loop latches.
373 static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches = false);
374
375 /// Perform common-subexpression-elimination on \p Plan.
376 static void cse(VPlan &Plan);
377
378 /// If there's a single exit block, optimize its phi recipes that use exiting
379 /// IV values by feeding them precomputed end values instead, possibly taken
380 /// one step backwards.
381 static void optimizeInductionLiveOutUsers(VPlan &Plan,
382 PredicatedScalarEvolution &PSE,
383 bool FoldTail);
384
385 /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
386 static void materializeBroadcasts(VPlan &Plan);
387
388 /// Hoist predicated loads from the same address to the loop entry block, if
389 /// they are guaranteed to execute on both paths (i.e., in replicate regions
390 /// with complementary masks P and NOT P).
391 static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
392 const Loop *L);
393
394 /// Sink predicated stores to the same address with complementary predicates
395 /// (P and NOT P) to an unconditional store with select recipes for the
396 /// stored values. This eliminates branching overhead when all paths
397 /// unconditionally store to the same location.
398 static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE,
399 const Loop *L);
400
401 // Materialize vector trip counts for constants early if it can simply be
402 // computed as (Original TC / VF * UF) * VF * UF.
403 static void
404 materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF,
405 unsigned BestUF,
406 PredicatedScalarEvolution &PSE);
407
408 /// Materialize vector trip count computations to a set of VPInstructions.
409 /// \p Step is used as the step value for the trip count computation.
410 /// \p MaxRuntimeStep is the maximum possible runtime value of Step, used to
411 /// prove the trip count is divisible by the step for scalable VFs.
412 static void materializeVectorTripCount(
413 VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking,
414 bool RequiresScalarEpilogue, VPValue *Step,
415 std::optional<uint64_t> MaxRuntimeStep = std::nullopt);
416
417 /// Materialize the backedge-taken count to be computed explicitly using
418 /// VPInstructions.
419 static void materializeBackedgeTakenCount(VPlan &Plan,
420 VPBasicBlock *VectorPH);
421
422 /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
423 /// into vectors and Unpack recipes to extract scalars from vectors as
424 /// needed.
425 static void materializePacksAndUnpacks(VPlan &Plan);
426
427 /// Materialize UF, VF and VFxUF to be computed explicitly using
428 /// VPInstructions.
429 static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
430 ElementCount VF);
431
432 /// Expand VPExpandSCEVRecipes in \p Plan's entry block. Each
433 /// VPExpandSCEVRecipe is replaced with a live-in wrapping the expanded IR
434 /// value. A mapping from SCEV expressions to their expanded IR value is
435 /// returned.
436 static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
437 ScalarEvolution &SE);
438
439 /// Try to find a single VF among \p Plan's VFs for which all interleave
440 /// groups (with known minimum VF elements) can be replaced by wide loads and
441 /// stores processing VF elements, if all transformed interleave groups access
442 /// the full vector width (checked via the maximum vector register width). If
443 /// the transformation can be applied, the original \p Plan will be split in
444 /// 2:
445 /// 1. The original Plan with the single VF containing the optimized recipes
446 /// using wide loads instead of interleave groups.
447 /// 2. A new clone which contains all VFs of Plan except the optimized VF.
448 ///
449 /// This effectively is a very simple form of loop-aware SLP, where we use
450 /// interleave groups to identify candidates.
451 static std::unique_ptr<VPlan>
452 narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
453
454 /// Adapts the vector loop region for tail folding by introducing a header
455 /// mask and conditionally executing the content of the region:
456 ///
457 /// Vector loop region before:
458 /// +-------------------------------------------+
459 /// |%iv = ... |
460 /// |... |
461 /// |%iv.next = add %iv, vfxuf |
462 /// |branch-on-count %iv.next, vector-trip-count|
463 /// +-------------------------------------------+
464 ///
465 /// Vector loop region after:
466 /// +-------------------------------------------+
467 /// |%iv = ... |
468 /// |%wide.iv = widen-canonical-iv ... |
469 /// |%header-mask = icmp ule %wide.iv, BTC |
470 /// |branch-on-cond %header-mask |---+
471 /// +-------------------------------------------+ |
472 /// | |
473 /// v |
474 /// +-------------------------------------------+ |
475 /// | ... | |
476 /// +-------------------------------------------+ |
477 /// | |
478 /// v |
479 /// +-------------------------------------------+ |
480 /// |<phis> = phi [..., ...], [poison, header] |
481 /// |%iv.next = add %iv, vfxuf |<--+
482 /// |branch-on-count %iv.next, vector-trip-count|
483 /// +-------------------------------------------+
484 ///
485 /// Any VPInstruction::ExtractLastLanes are also updated to extract from the
486 /// last active lane of the header mask.
487 static void foldTailByMasking(VPlan &Plan);
488
489 /// Predicate and linearize the control-flow in the only loop region of
490 /// \p Plan.
491 static void introduceMasksAndLinearize(VPlan &Plan);
492
493 /// Replace a VPWidenCanonicalIVRecipe if it is present in \p Plan, with a
494 /// VPWidenIntOrFpInductionRecipe, provided it would not cause additional
495 /// spills for \p VF at unroll factor \p UF.
497 VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI,
499 unsigned UF, const SmallPtrSetImpl<const Value *> &ValuesToIgnore);
500
501 /// Add branch weight metadata, if the \p Plan's middle block is terminated by
502 /// a BranchOnCond recipe.
503 static void
504 addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
505 std::optional<unsigned> VScaleForTuning);
506
507 /// Adjust first-order recurrence users in the middle block: create
508 /// penultimate element extracts for LCSSA phi users, and handle penultimate
509 /// extracts of the last active lane edge.
510 static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan,
511 VFRange &Range);
512
513 /// Optimize FindLast reductions selecting IVs (or expressions of IVs) by
514 /// converting them to FindIV reductions, if their IV range excludes a
515 /// suitable sentinel value. For expressions of IVs, the expression is sunk
516 /// to the middle block.
517 static void optimizeFindIVReductions(VPlan &Plan,
518 PredicatedScalarEvolution &PSE, Loop &L);
519
520 /// Detect and create partial reduction recipes for scaled reductions in
521 /// \p Plan. Must be called after recipe construction. If partial reductions
522 /// are only valid for a subset of VFs in Range, Range.End is updated.
523 static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx,
524 VFRange &Range);
525
526 /// Convert load/store VPInstructions in \p Plan into widened or replicate
527 /// recipes. Non load/store input instructions are left unchanged.
528 static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range,
529 VPRecipeBuilder &RecipeBuilder);
530
531 /// Make VPlan-based scalarization decision prior to delegating to the ones
532 /// made by the legacy CM. Only transforms "usesFirstLaneOnly` def-use chains
533 /// enabled by prior widening of consecutive memory operations for now.
534 static void makeScalarizationDecisions(VPlan &Plan, VFRange &Range);
535
536 /// Convert call VPInstructions in \p Plan into widened call, vector
537 /// intrinsic or replicate recipes based on a cost comparison via \p CostCtx.
538 static void makeCallWideningDecisions(VPlan &Plan, VFRange &Range,
539 VPRecipeBuilder &RecipeBuilder,
540 VPCostContext &CostCtx);
541};
542
543} // namespace llvm
544
545#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static constexpr uint32_t MinItersBypassWeights[]
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This pass exposes codegen information to IR-level passes.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const char PassName[]
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
A struct for saving information about induction variables.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
The main scalar evolution driver.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
VPlan-based builder utility analogous to IRBuilder.
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4325
Helper class to create VPRecipies from IR instructions.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:801
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4506
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1065
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4651
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI_FOR_TEST cl::opt< bool > VerifyEachVPlan
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintAfterAll
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:78
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI_FOR_TEST cl::list< std::string > VPlanPrintAfterPasses
TargetTransformInfo TTI
LLVM_ABI_FOR_TEST bool verifyVPlanIsValid(const VPlan &Plan)
Verify invariants for general VPlans.
LLVM_ABI_FOR_TEST cl::opt< bool > VPlanPrintVectorRegionScope
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static LLVM_ABI_FOR_TEST bool tryToConvertVPInstructionsToVPRecipes(VPlan &Plan, const TargetLibraryInfo &TLI)
Replaces the VPInstructions in Plan with corresponding widen recipes.
static void makeMemOpWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder)
Convert load/store VPInstructions in Plan into widened or replicate recipes.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock=nullptr)
static decltype(auto) runPass(StringRef PassName, PassTy &&Pass, VPlan &Plan, ArgsTy &&...Args)
Helper to run a VPlan pass Pass on VPlan, forwarding extra arguments to the pass.
static bool createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static void materializeBroadcasts(VPlan &Plan)
Add explicit broadcasts for live-ins and VPValues defined in Plan's entry block if they are used as v...
static void materializePacksAndUnpacks(VPlan &Plan)
Add explicit Build[Struct]Vector recipes to Pack multiple scalar values into vectors and Unpack recip...
static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, const bool &EpilogueAllowed)
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, PredicatedScalarEvolution &PSE)
Try to simplify VPInstruction::ExplicitVectorLength recipes when the AVL is known to be <= VF,...
static void removeBranchOnConst(VPlan &Plan, bool OnlyLatches=false)
Remove BranchOnCond recipes with true or false conditions together with removing dead edges to their ...
static void introduceMasksAndLinearize(VPlan &Plan)
Predicate and linearize the control-flow in the only loop region of Plan.
static void materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF)
Materialize UF, VF and VFxUF to be computed explicitly using VPInstructions.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH)
Materialize the backedge-taken count to be computed explicitly using VPInstructions.
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow)
Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static void replaceWideCanonicalIVWithWideIV(VPlan &Plan, ScalarEvolution &SE, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, ElementCount VF, unsigned UF, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Replace a VPWidenCanonicalIVRecipe if it is present in Plan, with a VPWidenIntOrFpInductionRecipe,...
static void createAndOptimizeReplicateRegions(VPlan &Plan)
Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...
static void convertToVariableLengthStep(VPlan &Plan)
Transform loops with variable-length stepping after region dissolution.
static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, std::optional< unsigned > VScaleForTuning)
Add branch weight metadata, if the Plan's middle block is terminated by a BranchOnCond recipe.
static std::unique_ptr< VPlan > narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI)
Try to find a single VF among Plan's VFs for which all interleave groups (with known minimum VF eleme...
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void createInLoopReductionRecipes(VPlan &Plan, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void unrollByUF(VPlan &Plan, unsigned UF)
Explicitly unroll Plan by UF.
static DenseMap< const SCEV *, Value * > expandSCEVs(VPlan &Plan, ScalarEvolution &SE)
Expand VPExpandSCEVRecipes in Plan's entry block.
static void convertToConcreteRecipes(VPlan &Plan)
Lower abstract recipes to concrete ones, that can be codegen'd.
static void expandBranchOnTwoConds(VPlan &Plan)
Expand BranchOnTwoConds instructions into explicit CFG with BranchOnCond instructions.
static void materializeVectorTripCount(VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking, bool RequiresScalarEpilogue, VPValue *Step, std::optional< uint64_t > MaxRuntimeStep=std::nullopt)
Materialize vector trip count computations to a set of VPInstructions.
static void hoistPredicatedLoads(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Hoist predicated loads from the same address to the loop entry block, if they are guaranteed to execu...
static bool mergeBlocksIntoPredecessors(VPlan &Plan)
Remove redundant VPBasicBlocks by merging them into their single predecessor if the latter has a sing...
static void optimizeFindIVReductions(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &L)
Optimize FindLast reductions selecting IVs (or expressions of IVs) by converting them to FindIV reduc...
static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range)
This function converts initial recipes to the abstract recipes and clamps Range based on cost model f...
static void materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
static void makeScalarizationDecisions(VPlan &Plan, VFRange &Range)
Make VPlan-based scalarization decision prior to delegating to the ones made by the legacy CM.
static void addExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)
Add a VPCurrentIterationPHIRecipe and related recipes to Plan and replaces all uses of the canonical ...
static void makeCallWideningDecisions(VPlan &Plan, VFRange &Range, VPRecipeBuilder &RecipeBuilder, VPCostContext &CostCtx)
Convert call VPInstructions in Plan into widened call, vector intrinsic or replicate recipes based on...
static void adjustFirstOrderRecurrenceMiddleUsers(VPlan &Plan, VFRange &Range)
Adjust first-order recurrence users in the middle block: create penultimate element extracts for LCSS...
static void optimizeEVLMasks(VPlan &Plan)
Optimize recipes which use an EVL-based header mask to VP intrinsics, for example:
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static void replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap)
Replace symbolic strides from StridesMap in Plan with constants when possible.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void simplifyRecipes(VPlan &Plan)
Perform instcombine-like simplifications on recipes in Plan.
static void sinkPredicatedStores(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L)
Sink predicated stores to the same address with complementary predicates (P and NOT P) to an uncondit...
static void replicateByVF(VPlan &Plan, ElementCount VF)
Replace replicating VPReplicateRecipe, VPScalarIVStepsRecipe and VPInstruction in Plan with VF single...
static void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
Add a new check block before the vector preheader to Plan to check if the main vector loop should be ...
static void clearReductionWrapFlags(VPlan &Plan)
Clear NSW/NUW flags from reduction instructions if necessary.
static void optimizeInductionLiveOutUsers(VPlan &Plan, PredicatedScalarEvolution &PSE, bool FoldTail)
If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...
static void createPartialReductions(VPlan &Plan, VPCostContext &CostCtx, VFRange &Range)
Detect and create partial reduction recipes for scaled reductions in Plan.
static void cse(VPlan &Plan)
Perform common-subexpression-elimination on Plan.
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...
static void dissolveLoopRegions(VPlan &Plan)
Replace loop regions with explicit CFG.
static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)
Insert truncates and extends for any truncated recipe.
static void dropPoisonGeneratingRecipes(VPlan &Plan)
Drop poison flags from recipes that may generate a poison value that is used after vectorization,...
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
Optimize Plan based on BestVF and BestUF.
static void convertEVLExitCond(VPlan &Plan)
Replaces the exit condition from (branch-on-cond eq CanonicalIVInc, VectorTripCount) to (branch-on-co...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...