LLVM 23.0.0git
LoopVectorizationPlanner.cpp
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.cpp - VF selection and planning -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements VFSelectionContext methods for loop vectorization
11/// VF selection, independent of cost-modeling decisions.
12///
13//===----------------------------------------------------------------------===//
14
21#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "loop-vectorize"
29
31
33 "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
34 cl::desc("Maximize bandwidth when selecting vectorization factor which "
35 "will be determined by the smallest type in loop."));
36
38 "vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true),
40 cl::desc("Try wider VFs if they enable the use of vector variants"));
41
43 "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
44 cl::desc("Discard VFs if their register pressure is too high."));
45
47 "force-target-supports-scalable-vectors", cl::init(false), cl::Hidden,
49 "Pretend that scalable vectors are supported, even if the target does "
50 "not support them. This flag should only be used for testing."));
51
53 "prefer-inloop-reductions", cl::init(false), cl::Hidden,
54 cl::desc("Prefer in-loop vector reductions, "
55 "overriding the targets preference."));
56
57/// Note: This currently only applies to `llvm.masked.load` and
58/// `llvm.masked.store`. TODO: Extend this to cover other operations as needed.
60 "force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden,
61 cl::desc("Assume the target supports masked memory operations (used for "
62 "testing)."));
63
65 "force-target-supports-gather-scatter-ops", cl::init(false), cl::Hidden,
66 cl::desc("Assume the target supports gather/scatter operations (used for "
67 "testing)."));
68
70 ElementCount VF) const {
72 auto *Ty = getLoadStoreType(I);
73 const unsigned AS = getLoadStoreAddressSpace(I);
74 const Align Alignment = getLoadStoreAlignment(I);
75
77 (isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment, AS)
78 : TTI.isLegalMaskedStore(Ty, Alignment, AS));
79}
80
82 ElementCount VF) const {
83 bool LI = isa<LoadInst>(V);
84 bool SI = isa<StoreInst>(V);
85 if (!LI && !SI)
86 return false;
87 auto *Ty = getLoadStoreType(V);
89 if (VF.isVector())
90 Ty = VectorType::get(Ty, VF);
92 (LI && TTI.isLegalMaskedGather(Ty, Align)) ||
93 (SI && TTI.isLegalMaskedScatter(Ty, Align));
94}
95
97 return TTI.supportsScalableVectors() || ForceTargetSupportsScalableVectors;
98}
99
100bool VFSelectionContext::useMaxBandwidth(bool IsScalable) const {
104 return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
105 (TTI.shouldMaximizeVectorBandwidth(RegKind) ||
107 Legal->hasVectorCallVariants())));
108}
109
111 if (ConsiderRegPressure.getNumOccurrences())
112 return ConsiderRegPressure;
113
114 // TODO: We should eventually consider register pressure for all targets. The
115 // TTI hook is temporary whilst target-specific issues are being fixed.
116 if (TTI.shouldConsiderVectorizationRegPressure())
117 return true;
118
119 if (!useMaxBandwidth(VF.isScalable()))
120 return false;
121 // Only calculate register pressure for VFs enabled by MaxBandwidth.
123 VF, VF.isScalable() ? MaxPermissibleVFWithoutMaxBW.ScalableVF
124 : MaxPermissibleVFWithoutMaxBW.FixedVF);
125}
126
127ElementCount VFSelectionContext::clampVFByMaxTripCount(
128 ElementCount VF, unsigned MaxTripCount, unsigned UserIC,
129 bool FoldTailByMasking, bool RequiresScalarEpilogue) const {
130 unsigned EstimatedVF = VF.getKnownMinValue();
131 if (VF.isScalable() && F.hasFnAttribute(Attribute::VScaleRange)) {
132 auto Attr = F.getFnAttribute(Attribute::VScaleRange);
133 auto Min = Attr.getVScaleRangeMin();
134 EstimatedVF *= Min;
135 }
136
137 // When a scalar epilogue is required, at least one iteration of the scalar
138 // loop has to execute. Adjust MaxTripCount accordingly to avoid picking a
139 // max VF that results in a dead vector loop.
140 if (MaxTripCount > 0 && RequiresScalarEpilogue)
141 MaxTripCount -= 1;
142
143 // When the user specifies an interleave count, we need to ensure that
144 // VF * UserIC <= MaxTripCount to avoid a dead vector loop.
145 unsigned IC = UserIC > 0 ? UserIC : 1;
146 unsigned EstimatedVFTimesIC = EstimatedVF * IC;
147
148 if (MaxTripCount && MaxTripCount <= EstimatedVFTimesIC &&
149 (!FoldTailByMasking || isPowerOf2_32(MaxTripCount))) {
150 // If upper bound loop trip count (TC) is known at compile time there is no
151 // point in choosing VF greater than TC / IC (as done in the loop below).
152 // Select maximum power of two which doesn't exceed TC / IC. If VF is
153 // scalable, we only fall back on a fixed VF when the TC is less than or
154 // equal to the known number of lanes.
155 auto ClampedUpperTripCount = llvm::bit_floor(MaxTripCount / IC);
156 if (ClampedUpperTripCount == 0)
157 ClampedUpperTripCount = 1;
158 LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to maximum power of two not "
159 "exceeding the constant trip count"
160 << (UserIC > 0 ? " divided by UserIC" : "") << ": "
161 << ClampedUpperTripCount << "\n");
162 return ElementCount::get(ClampedUpperTripCount,
163 FoldTailByMasking ? VF.isScalable() : false);
164 }
165 return VF;
166}
167
168ElementCount VFSelectionContext::getMaximizedVFForTarget(
169 unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
170 ElementCount MaxSafeVF, unsigned UserIC, bool FoldTailByMasking,
171 bool RequiresScalarEpilogue) {
172 bool ComputeScalableMaxVF = MaxSafeVF.isScalable();
173 const TypeSize WidestRegister = TTI.getRegisterBitWidth(
174 ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
176
177 // Convenience function to return the minimum of two ElementCounts.
178 auto MinVF = [](const ElementCount &LHS, const ElementCount &RHS) {
179 assert((LHS.isScalable() == RHS.isScalable()) &&
180 "Scalable flags must match");
182 };
183
184 // Ensure MaxVF is a power of 2; the dependence distance bound may not be.
185 // Note that both WidestRegister and WidestType may not be a powers of 2.
186 auto MaxVectorElementCount = ElementCount::get(
187 llvm::bit_floor(WidestRegister.getKnownMinValue() / WidestType),
188 ComputeScalableMaxVF);
189 MaxVectorElementCount = MinVF(MaxVectorElementCount, MaxSafeVF);
190 LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: "
191 << (MaxVectorElementCount * WidestType) << " bits.\n");
192
193 if (!MaxVectorElementCount) {
194 LLVM_DEBUG(dbgs() << "LV: The target has no "
195 << (ComputeScalableMaxVF ? "scalable" : "fixed")
196 << " vector registers.\n");
197 return ElementCount::getFixed(1);
198 }
199
200 ElementCount MaxVF =
201 clampVFByMaxTripCount(MaxVectorElementCount, MaxTripCount, UserIC,
202 FoldTailByMasking, RequiresScalarEpilogue);
203 // If the MaxVF was already clamped, there's no point in trying to pick a
204 // larger one.
205 if (MaxVF != MaxVectorElementCount)
206 return MaxVF;
207
208 if (MaxVF.isScalable())
209 MaxPermissibleVFWithoutMaxBW.ScalableVF = MaxVF;
210 else
211 MaxPermissibleVFWithoutMaxBW.FixedVF = MaxVF;
212
213 if (useMaxBandwidth(ComputeScalableMaxVF)) {
214 auto MaxVectorElementCountMaxBW = ElementCount::get(
215 llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
216 ComputeScalableMaxVF);
217 MaxVF = MinVF(MaxVectorElementCountMaxBW, MaxSafeVF);
218
219 if (ElementCount MinVF =
220 TTI.getMinimumVF(SmallestType, ComputeScalableMaxVF)) {
221 if (ElementCount::isKnownLT(MaxVF, MinVF)) {
222 LLVM_DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF
223 << ") with target's minimum: " << MinVF << '\n');
224 MaxVF = MinVF;
225 }
226 }
227
228 MaxVF = clampVFByMaxTripCount(MaxVF, MaxTripCount, UserIC,
229 FoldTailByMasking, RequiresScalarEpilogue);
230 }
231 return MaxVF;
232}
233
234std::optional<unsigned> llvm::getMaxVScale(const Function &F,
235 const TargetTransformInfo &TTI) {
236 if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
237 return MaxVScale;
238
239 if (F.hasFnAttribute(Attribute::VScaleRange))
240 return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
241
242 return std::nullopt;
243}
244
245bool VFSelectionContext::isScalableVectorizationAllowed() {
246 if (IsScalableVectorizationAllowed)
247 return *IsScalableVectorizationAllowed;
248
249 IsScalableVectorizationAllowed = false;
251 return false;
252
253 if (Hints->isScalableVectorizationDisabled()) {
254 reportVectorizationInfo("Scalable vectorization is explicitly disabled",
255 "ScalableVectorizationDisabled", ORE, TheLoop);
256 return false;
257 }
258
259 LLVM_DEBUG(dbgs() << "LV: Scalable vectorization is available\n");
260
261 auto MaxScalableVF = ElementCount::getScalable(
262 std::numeric_limits<ElementCount::ScalarTy>::max());
263
264 // Test that the loop-vectorizer can legalize all operations for this MaxVF.
265 // FIXME: While for scalable vectors this is currently sufficient, this should
266 // be replaced by a more detailed mechanism that filters out specific VFs,
267 // instead of invalidating vectorization for a whole set of VFs based on the
268 // MaxVF.
269
270 // Disable scalable vectorization if the loop contains unsupported reductions.
271 if (!all_of(Legal->getReductionVars(), [&](const auto &Reduction) -> bool {
272 return TTI.isLegalToVectorizeReduction(Reduction.second, MaxScalableVF);
273 })) {
275 "Scalable vectorization not supported for the reduction "
276 "operations found in this loop.",
277 "ScalableVFUnfeasible", ORE, TheLoop);
278 return false;
279 }
280
281 // Disable scalable vectorization if the loop contains any instructions
282 // with element types not supported for scalable vectors.
283 if (any_of(ElementTypesInLoop, [&](Type *Ty) {
284 return !Ty->isVoidTy() && !TTI.isElementTypeLegalForScalableVector(Ty);
285 })) {
286 reportVectorizationInfo("Scalable vectorization is not supported "
287 "for all element types found in this loop.",
288 "ScalableVFUnfeasible", ORE, TheLoop);
289 return false;
290 }
291
292 if (!Legal->isSafeForAnyVectorWidth() && !getMaxVScale(F, TTI)) {
293 reportVectorizationInfo("The target does not provide maximum vscale value "
294 "for safe distance analysis.",
295 "ScalableVFUnfeasible", ORE, TheLoop);
296 return false;
297 }
298
299 IsScalableVectorizationAllowed = true;
300 return true;
301}
302
304VFSelectionContext::getMaxLegalScalableVF(unsigned MaxSafeElements) {
305 if (!isScalableVectorizationAllowed())
307
308 auto MaxScalableVF = ElementCount::getScalable(
309 std::numeric_limits<ElementCount::ScalarTy>::max());
310 if (Legal->isSafeForAnyVectorWidth())
311 return MaxScalableVF;
312
313 std::optional<unsigned> MaxVScale = getMaxVScale(F, TTI);
314 // Limit MaxScalableVF by the maximum safe dependence distance.
315 MaxScalableVF = ElementCount::getScalable(MaxSafeElements / *MaxVScale);
316
317 if (!MaxScalableVF)
319 "Max legal vector width too small, scalable vectorization "
320 "unfeasible.",
321 "ScalableVFUnfeasible", ORE, TheLoop);
322
323 return MaxScalableVF;
324}
325
327 unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC,
328 bool FoldTailByMasking, bool RequiresScalarEpilogue) {
329 auto [SmallestType, WidestType] = getSmallestAndWidestTypes();
330
331 // Get the maximum safe dependence distance in bits computed by LAA.
332 // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
333 // the memory accesses that is most restrictive (involved in the smallest
334 // dependence distance).
335 unsigned MaxSafeElementsPowerOf2 =
336 llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
337 if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
338 unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
339 MaxSafeElementsPowerOf2 =
340 std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);
341 }
342
343 auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);
344 auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
345
346 if (!Legal->isSafeForAnyVectorWidth())
347 MaxSafeElements = MaxSafeElementsPowerOf2;
348
349 LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
350 << ".\n");
351 LLVM_DEBUG(dbgs() << "LV: The max safe scalable VF is: " << MaxSafeScalableVF
352 << ".\n");
353
354 // First analyze the UserVF, fall back if the UserVF should be ignored.
355 if (UserVF) {
356 auto MaxSafeUserVF =
357 UserVF.isScalable() ? MaxSafeScalableVF : MaxSafeFixedVF;
358
359 if (ElementCount::isKnownLE(UserVF, MaxSafeUserVF)) {
360 // If `VF=vscale x N` is safe, then so is `VF=N`
361 if (UserVF.isScalable())
362 return FixedScalableVFPair(
363 ElementCount::getFixed(UserVF.getKnownMinValue()), UserVF);
364
365 return UserVF;
366 }
367
368 assert(ElementCount::isKnownGT(UserVF, MaxSafeUserVF));
369
370 // Only clamp if the UserVF is not scalable. If the UserVF is scalable, it
371 // is better to ignore the hint and let the compiler choose a suitable VF.
372 if (!UserVF.isScalable()) {
373 LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
374 << " is unsafe, clamping to max safe VF="
375 << MaxSafeFixedVF << ".\n");
376 ORE->emit([&]() {
377 return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
378 TheLoop->getStartLoc(),
379 TheLoop->getHeader())
380 << "User-specified vectorization factor "
381 << ore::NV("UserVectorizationFactor", UserVF)
382 << " is unsafe, clamping to maximum safe vectorization factor "
383 << ore::NV("VectorizationFactor", MaxSafeFixedVF);
384 });
385 return MaxSafeFixedVF;
386 }
387
389 LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
390 << " is ignored because scalable vectors are not "
391 "available.\n");
392 ORE->emit([&]() {
393 return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
394 TheLoop->getStartLoc(),
395 TheLoop->getHeader())
396 << "User-specified vectorization factor "
397 << ore::NV("UserVectorizationFactor", UserVF)
398 << " is ignored because the target does not support scalable "
399 "vectors. The compiler will pick a more suitable value.";
400 });
401 } else {
402 LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
403 << " is unsafe. Ignoring scalable UserVF.\n");
404 ORE->emit([&]() {
405 return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
406 TheLoop->getStartLoc(),
407 TheLoop->getHeader())
408 << "User-specified vectorization factor "
409 << ore::NV("UserVectorizationFactor", UserVF)
410 << " is unsafe. Ignoring the hint to let the compiler pick a "
411 "more suitable value.";
412 });
413 }
414 }
415
416 LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType
417 << " / " << WidestType << " bits.\n");
418
421 if (auto MaxVF = getMaximizedVFForTarget(
422 MaxTripCount, SmallestType, WidestType, MaxSafeFixedVF, UserIC,
423 FoldTailByMasking, RequiresScalarEpilogue))
424 Result.FixedVF = MaxVF;
425
426 if (auto MaxVF = getMaximizedVFForTarget(
427 MaxTripCount, SmallestType, WidestType, MaxSafeScalableVF, UserIC,
428 FoldTailByMasking, RequiresScalarEpilogue))
429 if (MaxVF.isScalable()) {
430 Result.ScalableVF = MaxVF;
431 LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = " << MaxVF
432 << "\n");
433 }
434
435 return Result;
436}
437
438std::pair<unsigned, unsigned>
440 unsigned MinWidth = -1U;
441 unsigned MaxWidth = 8;
442 const DataLayout &DL = F.getDataLayout();
443 // For in-loop reductions, no element types are added to ElementTypesInLoop
444 // if there are no loads/stores in the loop. In this case, check through the
445 // reduction variables to determine the maximum width.
446 if (ElementTypesInLoop.empty() && !Legal->getReductionVars().empty()) {
447 for (const auto &[_, RdxDesc] : Legal->getReductionVars()) {
448 // When finding the min width used by the recurrence we need to account
449 // for casts on the input operands of the recurrence.
450 MinWidth = std::min(
451 MinWidth,
452 std::min(RdxDesc.getMinWidthCastToRecurrenceTypeInBits(),
453 RdxDesc.getRecurrenceType()->getScalarSizeInBits()));
454 MaxWidth = std::max(MaxWidth,
455 RdxDesc.getRecurrenceType()->getScalarSizeInBits());
456 }
457 } else {
458 for (Type *T : ElementTypesInLoop) {
459 MinWidth = std::min<unsigned>(
460 MinWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedValue());
461 MaxWidth = std::max<unsigned>(
462 MaxWidth, DL.getTypeSizeInBits(T->getScalarType()).getFixedValue());
463 }
464 }
465 return {MinWidth, MaxWidth};
466}
467
469 const SmallPtrSetImpl<const Value *> *ValuesToIgnore) {
470 ElementTypesInLoop.clear();
471 // For each block.
472 for (BasicBlock *BB : TheLoop->blocks()) {
473 // For each instruction in the loop.
474 for (Instruction &I : *BB) {
475 Type *T = I.getType();
476
477 // Skip ignored values.
478 if (ValuesToIgnore && ValuesToIgnore->contains(&I))
479 continue;
480
481 // Only examine Loads, Stores and PHINodes.
483 continue;
484
485 // Examine PHI nodes that are reduction variables. Update the type to
486 // account for the recurrence type.
487 if (auto *PN = dyn_cast<PHINode>(&I)) {
488 if (!Legal->isReductionVariable(PN))
489 continue;
490 const RecurrenceDescriptor &RdxDesc =
491 Legal->getRecurrenceDescriptor(PN);
493 TTI.preferInLoopReduction(RdxDesc.getRecurrenceKind(),
494 RdxDesc.getRecurrenceType()))
495 continue;
496 T = RdxDesc.getRecurrenceType();
497 }
498
499 // Examine the stored values.
500 if (auto *ST = dyn_cast<StoreInst>(&I))
501 T = ST->getValueOperand()->getType();
502
503 assert(T->isSized() &&
504 "Expected the load/store/recurrence type to be sized");
505
506 ElementTypesInLoop.insert(T);
507 }
508 }
509}
510
511void VFSelectionContext::initializeVScaleForTuning() {
513 return;
514
515 if (F.hasFnAttribute(Attribute::VScaleRange)) {
516 auto Attr = F.getFnAttribute(Attribute::VScaleRange);
517 auto Min = Attr.getVScaleRangeMin();
518 auto Max = Attr.getVScaleRangeMax();
519 if (Max && Min == Max) {
520 VScaleForTuning = Max;
521 return;
522 }
523 }
524
525 VScaleForTuning = TTI.getVScaleForTuning();
526}
527
529 const RecurrenceDescriptor &RdxDesc) const {
530 return !Hints->allowReordering() && RdxDesc.isOrdered();
531}
532
534 LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n");
535
536 Loop *L = const_cast<Loop *>(TheLoop);
537 if (Legal->getRuntimePointerChecking()->Need) {
539 "Runtime ptr check is required with -Os/-Oz",
540 "runtime pointer checks needed. Enable vectorization of this "
541 "loop with '#pragma clang loop vectorize(enable)' when "
542 "compiling with -Os/-Oz",
543 "CantVersionLoopWithOptForSize", ORE, L);
544 return true;
545 }
546
547 if (!PSE.getPredicate().isAlwaysTrue()) {
549 "Runtime SCEV check is required with -Os/-Oz",
550 "runtime SCEV checks needed. Enable vectorization of this "
551 "loop with '#pragma clang loop vectorize(enable)' when "
552 "compiling with -Os/-Oz",
553 "CantVersionLoopWithOptForSize", ORE, L);
554 return true;
555 }
556
557 // FIXME: Avoid specializing for stride==1 instead of bailing out.
558 if (!Legal->getLAI()->getSymbolicStrides().empty()) {
560 "Runtime stride check for small trip count",
561 "runtime stride == 1 checks needed. Enable vectorization of "
562 "this loop without such check by compiling with -Os/-Oz",
563 "CantVersionLoopWithOptForSize", ORE, L);
564 return true;
565 }
566
567 return false;
568}
569
571 MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
572}
573
575 // Avoid duplicating work finding in-loop reductions.
576 if (!InLoopReductions.empty())
577 return;
578
579 for (const auto &Reduction : Legal->getReductionVars()) {
580 PHINode *Phi = Reduction.first;
581 const RecurrenceDescriptor &RdxDesc = Reduction.second;
582
583 // Multi-use reductions (e.g., used in FindLastIV patterns) are handled
584 // separately and should not be considered for in-loop reductions.
585 if (RdxDesc.hasUsesOutsideReductionChain())
586 continue;
587
588 // We don't collect reductions that are type promoted (yet).
589 if (RdxDesc.getRecurrenceType() != Phi->getType())
590 continue;
591
592 // In-loop AnyOf and FindIV reductions are not yet supported.
593 RecurKind Kind = RdxDesc.getRecurrenceKind();
597 continue;
598
599 // If the target would prefer this reduction to happen "in-loop", then we
600 // want to record it as such.
602 !TTI.preferInLoopReduction(Kind, Phi->getType()))
603 continue;
604
605 // Check that we can correctly put the reductions into the loop, by
606 // finding the chain of operations that leads from the phi to the loop
607 // exit value.
608 SmallVector<Instruction *, 4> ReductionOperations =
609 RdxDesc.getReductionOpChain(Phi, const_cast<Loop *>(TheLoop));
610 bool InLoop = !ReductionOperations.empty();
611
612 if (InLoop) {
613 InLoopReductions.insert(Phi);
614 // Add the elements to InLoopReductionImmediateChains for cost modelling.
615 Instruction *LastChain = Phi;
616 for (auto *I : ReductionOperations) {
617 InLoopReductionImmediateChains[I] = LastChain;
618 LastChain = I;
619 }
620 }
621 LLVM_DEBUG(dbgs() << "LV: Using " << (InLoop ? "inloop" : "out of loop")
622 << " reduction for phi: " << *Phi << "\n");
623 }
624}
625
626// TODO: we could return a pair of values that specify the max VF and
627// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
628// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
629// doesn't have a cost model that can choose which plan to execute if
630// more than one is generated.
633 if (UserVF.isScalable() && !supportsScalableVectors()) {
635 "Scalable vectorization requested but not supported by the target",
636 "the scalable user-specified vectorization width for outer-loop "
637 "vectorization cannot be used because the target does not support "
638 "scalable vectors.",
639 "ScalableVFUnfeasible", ORE, TheLoop);
641 }
642
643 ElementCount VF = UserVF;
644 if (VF.isZero()) {
645 auto [_, WidestType] = getSmallestAndWidestTypes();
646
647 auto RegKind = TTI.enableScalableVectorization()
650
651 TypeSize RegSize = TTI.getRegisterBitWidth(RegKind);
652 unsigned N = RegSize.getKnownMinValue() / WidestType;
653 VF = ElementCount::get(N, RegSize.isScalable());
654 LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
655
656 // Make sure we have a VF > 1 for stress testing.
658 LLVM_DEBUG(dbgs() << "LV: VPlan stress testing: "
659 << "overriding computed VF.\n");
661 }
662 }
664 "VF needs to be a power of two");
665 if (VF.isScalar())
667 LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "")
668 << "VF " << VF << " to build VPlans.\n");
669 return FixedScalableVFPair(VF);
670}
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
#define _
loop Loop Strength Reduction
This file defines the LoopVectorizationLegality class.
static cl::opt< bool > ForceTargetSupportsGatherScatterOps("force-target-supports-gather-scatter-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports gather/scatter operations (used for " "testing)."))
cl::opt< bool > VPlanBuildOuterloopStressTest
static cl::opt< bool > ForceTargetSupportsScalableVectors("force-target-supports-scalable-vectors", cl::init(false), cl::Hidden, cl::desc("Pretend that scalable vectors are supported, even if the target does " "not support them. This flag should only be used for testing."))
static cl::opt< bool > ConsiderRegPressure("vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden, cl::desc("Discard VFs if their register pressure is too high."))
static cl::opt< bool > UseWiderVFIfCallVariantsPresent("vectorizer-maximize-bandwidth-for-vector-calls", cl::init(true), cl::Hidden, cl::desc("Try wider VFs if they enable the use of vector variants"))
static cl::opt< bool > ForceTargetSupportsMaskedMemoryOps("force-target-supports-masked-memory-ops", cl::init(false), cl::Hidden, cl::desc("Assume the target supports masked memory operations (used for " "testing)."))
Note: This currently only applies to llvm.masked.load and llvm.masked.store.
static cl::opt< bool > MaximizeBandwidth("vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " "will be determined by the smallest type in loop."))
This file provides a LoopVectorizationPlanner class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Diagnostic information for optimization analysis remarks.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool contains(ConstPtrType Ptr) const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
FixedScalableVFPair computeVPlanOuterloopVF(ElementCount UserVF)
Returns a scalable VF to use for outer-loop vectorization if the target supports it and a fixed VF ot...
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
bool isLegalMaskedLoadOrStore(Instruction *I, ElementCount VF) const
Returns true if the target machine supports masked loads or stores for I's data type and alignment.
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
LLVM Value Representation.
Definition Value.h:75
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
TargetTransformInfo TTI
RecurKind
These are the kinds of recurrences that we support.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
cl::opt< bool > PreferInLoopReductions
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A class that represents two vectorization factors (initialized with 0 by default).
static FixedScalableVFPair getNone()