LLVM 23.0.0git
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The implementation for the loop memory dependence that was originally
10// developed for the loop vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/SmallSet.h"
40#include "llvm/IR/BasicBlock.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugLoc.h"
46#include "llvm/IR/Dominators.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/PassManager.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/Value.h"
55#include "llvm/IR/ValueHandle.h"
58#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <utility>
66#include <variant>
67#include <vector>
68
69using namespace llvm;
70using namespace llvm::SCEVPatternMatch;
71
72#define DEBUG_TYPE "loop-accesses"
73
75 VectorizationFactor("force-vector-width", cl::Hidden,
76 cl::desc("Sets the SIMD width. Zero is autoselect."),
79
81VectorizationInterleave("force-vector-interleave", cl::Hidden,
82 cl::desc("Sets the vectorization interleave count. "
83 "Zero is autoselect."),
87
89 "runtime-memory-check-threshold", cl::Hidden,
90 cl::desc("When performing memory disambiguation checks at runtime do not "
91 "generate more than this number of comparisons (default = 8)."),
94
95/// The maximum iterations used to merge memory checks
97 "memory-check-merge-threshold", cl::Hidden,
98 cl::desc("Maximum number of comparisons done when trying to merge "
99 "runtime memory checks. (default = 100)"),
100 cl::init(100));
101
102/// Maximum SIMD width.
103const unsigned VectorizerParams::MaxVectorWidth = 64;
104
105/// We collect dependences up to this threshold.
107 MaxDependences("max-dependences", cl::Hidden,
108 cl::desc("Maximum number of dependences collected by "
109 "loop-access analysis (default = 100)"),
110 cl::init(100));
111
112/// This enables versioning on the strides of symbolically striding memory
113/// accesses in code like the following.
114/// for (i = 0; i < N; ++i)
115/// A[i * Stride1] += B[i * Stride2] ...
116///
117/// Will be roughly translated to
118/// if (Stride1 == 1 && Stride2 == 1) {
119/// for (i = 0; i < N; i+=4)
120/// A[i:i+3] += ...
121/// } else
122/// ...
124 "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125 cl::desc("Enable symbolic stride memory access versioning"));
126
127/// Enable store-to-load forwarding conflict detection. This option can
128/// be disabled for correctness testing.
130 "store-to-load-forwarding-conflict-detection", cl::Hidden,
131 cl::desc("Enable conflict detection in loop-access analysis"),
132 cl::init(true));
133
135 "max-forked-scev-depth", cl::Hidden,
136 cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
137 cl::init(5));
138
140 "laa-speculate-unit-stride", cl::Hidden,
141 cl::desc("Speculate that non-constant strides are unit in LAA"),
142 cl::init(true));
143
145 "hoist-runtime-checks", cl::Hidden,
146 cl::desc(
147 "Hoist inner loop runtime memory checks to outer loop if possible"),
150
152 return ::VectorizationInterleave.getNumOccurrences() > 0;
153}
154
156 const DenseMap<Value *, const SCEV *> &PtrToStride,
157 Value *Ptr) {
158 const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
159
160 // If there is an entry in the map return the SCEV of the pointer with the
161 // symbolic stride replaced by one.
162 const SCEV *StrideSCEV = PtrToStride.lookup(Ptr);
163 if (!StrideSCEV)
164 // For a non-symbolic stride, just return the original expression.
165 return OrigSCEV;
166
167 // Note: This assert is both overly strong and overly weak. The actual
168 // invariant here is that StrideSCEV should be loop invariant. The only
169 // such invariant strides we happen to speculate right now are unknowns
170 // and thus this is a reasonable proxy of the actual invariant.
171 assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map");
172
173 ScalarEvolution *SE = PSE.getSE();
174 const SCEV *CT = SE->getOne(StrideSCEV->getType());
175 PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT));
176 const SCEV *Expr = PSE.getSCEV(Ptr);
177
178 LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
179 << " by: " << *Expr << "\n");
180 return Expr;
181}
182
184 unsigned Index, const RuntimePointerChecking &RtCheck)
185 : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
186 AddressSpace(RtCheck.Pointers[Index]
187 .PointerValue->getType()
189 NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
190 Members.push_back(Index);
191}
192
193/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
194/// return nullptr. \p A and \p B must have the same type.
195static const SCEV *addSCEVNoOverflow(const SCEV *A, const SCEV *B,
196 ScalarEvolution &SE) {
197 if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/false, A, B))
198 return nullptr;
199 return SE.getAddExpr(A, B);
200}
201
202/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
203/// return nullptr. \p A and \p B must have the same type.
204static const SCEV *mulSCEVNoOverflow(const SCEV *A, const SCEV *B,
205 ScalarEvolution &SE) {
206 if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/false, A, B))
207 return nullptr;
208 return SE.getMulExpr(A, B);
209}
210
211/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
212/// \p MaxBTC is guaranteed inbounds of the accessed object.
214 const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize,
216 AssumptionCache *AC,
217 std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
218 auto *PointerBase = SE.getPointerBase(AR->getStart());
219 auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
220 if (!StartPtr)
221 return false;
222 const Loop *L = AR->getLoop();
223 bool CheckForNonNull;
224 Value *StartPtrV = StartPtr->getValue();
225 // We can ignore frees, as the fact that an object of a certain size existed
226 // at the location *at some point* is sufficient to derive the nowrap fact.
227 uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(
228 DL, CheckForNonNull, /*CanBeFreed=*/nullptr);
229
230 if (DerefBytes && CheckForNonNull)
231 return false;
232
233 const SCEV *Step = AR->getStepRecurrence(SE);
234 Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
235 const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
236
237 // Check if we have a suitable dereferencable assumption we can use.
238 Instruction *CtxI = &*L->getHeader()->getFirstNonPHIIt();
239 if (BasicBlock *LoopPred = L->getLoopPredecessor()) {
240 if (isa<UncondBrInst, CondBrInst>(LoopPred->getTerminator()))
241 CtxI = LoopPred->getTerminator();
242 }
244 StartPtrV, Attribute::Dereferenceable, *AC,
245 [&](RetainedKnowledge RK, Instruction *Assume, auto) {
246 if (!isValidAssumeForContext(Assume, CtxI, DT))
247 return false;
248 const SCEV *DerefRKSCEV = SE.getSCEV(RK.IRArgValue);
249 Type *CommonTy =
250 SE.getWiderType(DerefBytesSCEV->getType(), DerefRKSCEV->getType());
251 DerefBytesSCEV = SE.getNoopOrZeroExtend(DerefBytesSCEV, CommonTy);
252 DerefRKSCEV = SE.getNoopOrZeroExtend(DerefRKSCEV, CommonTy);
253 DerefBytesSCEV = SE.getUMaxExpr(DerefBytesSCEV, DerefRKSCEV);
254 // Continue with other assumptions.
255 return false;
256 });
257
258 if (DerefBytesSCEV->isZero())
259 return false;
260
261 bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
262 if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
263 return false;
264
265 Step = SE.getNoopOrSignExtend(Step, WiderTy);
266 MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
267
268 // For the computations below, make sure they don't unsigned wrap.
269 if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr))
270 return false;
271 const SCEV *StartOffset = SE.getNoopOrZeroExtend(
272 SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
273
274 if (!LoopGuards)
275 LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(AR->getLoop(), SE));
276 MaxBTC = SE.applyLoopGuards(MaxBTC, *LoopGuards);
277
278 const SCEV *OffsetAtLastIter =
279 mulSCEVNoOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);
280 if (!OffsetAtLastIter) {
281 // Re-try with constant max backedge-taken count if using the symbolic one
282 // failed.
283 MaxBTC = SE.getConstantMaxBackedgeTakenCount(AR->getLoop());
284 if (isa<SCEVCouldNotCompute>(MaxBTC))
285 return false;
286 MaxBTC = SE.getNoopOrZeroExtend(
287 MaxBTC, WiderTy);
288 OffsetAtLastIter =
289 mulSCEVNoOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);
290 if (!OffsetAtLastIter)
291 return false;
292 }
293
294 const SCEV *OffsetEndBytes = addSCEVNoOverflow(
295 OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE);
296 if (!OffsetEndBytes)
297 return false;
298
299 if (IsKnownNonNegative) {
300 // For positive steps, check if
301 // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
302 // while making sure none of the computations unsigned wrap themselves.
303 const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
304 if (!EndBytes)
305 return false;
306
307 DerefBytesSCEV = SE.applyLoopGuards(DerefBytesSCEV, *LoopGuards);
308 return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
309 }
310
311 // For negative steps check if
312 // * StartOffset >= (MaxBTC * Step + EltSize)
313 // * StartOffset <= DerefBytes.
314 assert(SE.isKnownNegative(Step) && "must be known negative");
315 return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
316 SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
317}
318
319std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
320 const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
321 const SCEV *MaxBTC, ScalarEvolution *SE,
322 DenseMap<std::pair<const SCEV *, const SCEV *>,
323 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
325 std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
326 auto &DL = Lp->getHeader()->getDataLayout();
327 Type *IdxTy = DL.getIndexType(PtrExpr->getType());
328 const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
329
330 // Delegate to the SCEV-based overload, passing through the cache.
331 return getStartAndEndForAccess(Lp, PtrExpr, EltSizeSCEV, BTC, MaxBTC, SE,
332 PointerBounds, DT, AC, LoopGuards);
333}
334
335std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
336 const Loop *Lp, const SCEV *PtrExpr, const SCEV *EltSizeSCEV,
337 const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE,
338 DenseMap<std::pair<const SCEV *, const SCEV *>,
339 std::pair<const SCEV *, const SCEV *>> *PointerBounds,
341 std::optional<ScalarEvolution::LoopGuards> &LoopGuards) {
342 std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
343 if (PointerBounds) {
344 auto [Iter, Ins] = PointerBounds->insert(
345 {{PtrExpr, EltSizeSCEV},
346 {SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
347 if (!Ins)
348 return Iter->second;
349 PtrBoundsPair = &Iter->second;
350 }
351
352 const SCEV *ScStart;
353 const SCEV *ScEnd;
354
355 auto &DL = Lp->getHeader()->getDataLayout();
356 if (SE->isLoopInvariant(PtrExpr, Lp)) {
357 ScStart = ScEnd = PtrExpr;
358 } else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
359 ScStart = AR->getStart();
360 if (!isa<SCEVCouldNotCompute>(BTC))
361 // Evaluating AR at an exact BTC is safe: LAA separately checks that
362 // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
363 // the loop either triggers UB when executing a memory access with a
364 // poison pointer or the wrapping/poisoned pointer is not used.
365 ScEnd = AR->evaluateAtIteration(BTC, *SE);
366 else {
367 // Evaluating AR at MaxBTC may wrap and create an expression that is less
368 // than the start of the AddRec due to wrapping (for example consider
369 // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
370 // will get incremented by EltSize before returning, so this effectively
371 // sets ScEnd to the maximum unsigned value for the type. Note that LAA
372 // separately checks that accesses cannot not wrap, so unsigned max
373 // represents an upper bound.
374 if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
375 DT, AC, LoopGuards)) {
376 ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
377 } else {
378 ScEnd = SE->getAddExpr(
379 SE->getNegativeSCEV(EltSizeSCEV),
382 AR->getType())));
383 }
384 }
385 const SCEV *Step = AR->getStepRecurrence(*SE);
386
387 // For expressions with negative step, the upper bound is ScStart and the
388 // lower bound is ScEnd.
389 if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
390 if (CStep->getValue()->isNegative())
391 std::swap(ScStart, ScEnd);
392 } else {
393 // Fallback case: the step is not constant, but we can still
394 // get the upper and lower bounds of the interval by using min/max
395 // expressions.
396 ScStart = SE->getUMinExpr(ScStart, ScEnd);
397 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
398 }
399 } else
400 return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
401
402 assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
403 assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
404
405 // Add the size of the pointed element to ScEnd.
406 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
407
408 std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
409 if (PointerBounds)
410 *PtrBoundsPair = Res;
411 return Res;
412}
413
414/// Calculate Start and End points of memory access using
415/// getStartAndEndForAccess.
416void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
417 Type *AccessTy, bool WritePtr,
418 unsigned DepSetId, unsigned ASId,
420 bool NeedsFreeze) {
421 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
422 const SCEV *BTC = PSE.getBackedgeTakenCount();
423 const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
424 Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
425 &DC.getPointerBounds(), DC.getDT(), DC.getAC(), LoopGuards);
427 !isa<SCEVCouldNotCompute>(ScEnd) &&
428 "must be able to compute both start and end expressions");
429 Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
430 NeedsFreeze);
431}
432
433bool RuntimePointerChecking::tryToCreateDiffCheck(
434 const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
435 // If either group contains multiple different pointers, bail out.
436 // TODO: Support multiple pointers by using the minimum or maximum pointer,
437 // depending on src & sink.
438 if (CGI.Members.size() != 1 || CGJ.Members.size() != 1)
439 return false;
440
441 const PointerInfo *Src = &Pointers[CGI.Members[0]];
442 const PointerInfo *Sink = &Pointers[CGJ.Members[0]];
443
444 // If either pointer is read and written, multiple checks may be needed. Bail
445 // out.
446 if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
447 !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty())
448 return false;
449
450 ArrayRef<unsigned> AccSrc =
451 DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
452 ArrayRef<unsigned> AccSink =
453 DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
454 // If either pointer is accessed multiple times, there may not be a clear
455 // src/sink relation. Bail out for now.
456 if (AccSrc.size() != 1 || AccSink.size() != 1)
457 return false;
458
459 // If the sink is accessed before src, swap src/sink.
460 if (AccSink[0] < AccSrc[0])
461 std::swap(Src, Sink);
462
463 const SCEVConstant *Step;
464 const SCEV *SrcStart;
465 const SCEV *SinkStart;
466 const Loop *InnerLoop = DC.getInnermostLoop();
467 if (!match(Src->Expr,
469 m_SpecificLoop(InnerLoop))) ||
470 !match(Sink->Expr,
472 m_SpecificLoop(InnerLoop))))
473 return false;
474
476 DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
478 DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
479 Type *SrcTy = getLoadStoreType(SrcInsts[0]);
480 Type *DstTy = getLoadStoreType(SinkInsts[0]);
482 return false;
483
484 const DataLayout &DL = InnerLoop->getHeader()->getDataLayout();
485 unsigned AllocSize =
486 std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
487
488 // Only matching constant steps matching the AllocSize are supported at the
489 // moment. This simplifies the difference computation. Can be extended in the
490 // future.
491 if (Step->getAPInt().abs() != AllocSize)
492 return false;
493
494 // When counting down, the dependence distance needs to be swapped.
495 if (Step->getValue()->isNegative())
496 std::swap(SinkStart, SrcStart);
497
498 const SCEV *SinkStartInt = SE->getPtrToAddrExpr(SinkStart);
499 const SCEV *SrcStartInt = SE->getPtrToAddrExpr(SrcStart);
500 if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
501 isa<SCEVCouldNotCompute>(SrcStartInt))
502 return false;
503
504 // If the start values for both Src and Sink also vary according to an outer
505 // loop, then it's probably better to avoid creating diff checks because
506 // they may not be hoisted. We should instead let llvm::addRuntimeChecks
507 // do the expanded full range overlap checks, which can be hoisted.
508 if (HoistRuntimeChecks && InnerLoop->getParentLoop() &&
509 isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) {
510 auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt);
511 auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt);
512 const Loop *StartARLoop = SrcStartAR->getLoop();
513 if (StartARLoop == SinkStartAR->getLoop() &&
514 StartARLoop == InnerLoop->getParentLoop() &&
515 // If the diff check would already be loop invariant (due to the
516 // recurrences being the same), then we prefer to keep the diff checks
517 // because they are cheaper.
518 SrcStartAR->getStepRecurrence(*SE) !=
519 SinkStartAR->getStepRecurrence(*SE)) {
520 LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
521 "cannot be hoisted out of the outer loop\n");
522 return false;
523 }
524 }
525
526 LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
527 << "SrcStart: " << *SrcStartInt << '\n'
528 << "SinkStartInt: " << *SinkStartInt << '\n');
529 DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
530 Src->NeedsFreeze || Sink->NeedsFreeze);
531 return true;
532}
533
535 SmallVector<RuntimePointerCheck, 4> Checks;
536
537 for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
538 for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
541
542 if (needsChecking(CGI, CGJ)) {
543 CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);
544 Checks.emplace_back(&CGI, &CGJ);
545 }
546 }
547 }
548 return Checks;
549}
550
553 assert(Checks.empty() && "Checks is not empty");
554 groupChecks(DepCands);
555 Checks = generateChecks();
556}
557
559 const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
560 for (const auto &I : M.Members)
561 for (const auto &J : N.Members)
562 if (needsChecking(I, J))
563 return true;
564 return false;
565}
566
567/// Compare \p I and \p J and return the minimum.
568/// Return nullptr in case we couldn't find an answer.
569static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
570 ScalarEvolution *SE) {
571 std::optional<APInt> Diff = SE->computeConstantDifference(J, I);
572 if (!Diff)
573 return nullptr;
574 return Diff->isNegative() ? J : I;
575}
576
578 unsigned Index, const RuntimePointerChecking &RtCheck) {
579 return addPointer(
580 Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
581 RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
582 RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
583}
584
585bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
586 const SCEV *End, unsigned AS,
587 bool NeedsFreeze,
588 ScalarEvolution &SE) {
589 assert(AddressSpace == AS &&
590 "all pointers in a checking group must be in the same address space");
591
592 // Compare the starts and ends with the known minimum and maximum
593 // of this set. We need to know how we compare against the min/max
594 // of the set in order to be able to emit memchecks.
595 const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
596 if (!Min0)
597 return false;
598
599 const SCEV *Min1 = getMinFromExprs(End, High, &SE);
600 if (!Min1)
601 return false;
602
603 // Update the low bound expression if we've found a new min value.
604 if (Min0 == Start)
605 Low = Start;
606
607 // Update the high bound expression if we've found a new max value.
608 if (Min1 != End)
609 High = End;
610
611 Members.push_back(Index);
612 this->NeedsFreeze |= NeedsFreeze;
613 return true;
614}
615
616void RuntimePointerChecking::groupChecks(
618 // We build the groups from dependency candidates equivalence classes
619 // because:
620 // - We know that pointers in the same equivalence class share
621 // the same underlying object and therefore there is a chance
622 // that we can compare pointers
623 // - We wouldn't be able to merge two pointers for which we need
624 // to emit a memcheck. The classes in DepCands are already
625 // conveniently built such that no two pointers in the same
626 // class need checking against each other.
627
628 // We use the following (greedy) algorithm to construct the groups
629 // For every pointer in the equivalence class:
630 // For each existing group:
631 // - if the difference between this pointer and the min/max bounds
632 // of the group is a constant, then make the pointer part of the
633 // group and update the min/max bounds of that group as required.
634
635 CheckingGroups.clear();
636
637 // If we need to check two pointers to the same underlying object
638 // with a non-constant difference, we shouldn't perform any pointer
639 // grouping with those pointers. This is because we can easily get
640 // into cases where the resulting check would return false, even when
641 // the accesses are safe.
642 //
643 // The following example shows this:
644 // for (i = 0; i < 1000; ++i)
645 // a[5000 + i * m] = a[i] + a[i + 9000]
646 //
647 // Here grouping gives a check of (5000, 5000 + 1000 * m) against
648 // (0, 10000) which is always false. However, if m is 1, there is no
649 // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
650 // us to perform an accurate check in this case.
651 //
652 // In the above case, we have a non-constant distance and an Unknown
653 // dependence between accesses to the same underlying object, and could retry
654 // with runtime checks without dependency information being available. In this
655 // case we will use the fallback path and create separate checking groups for
656 // accesses not present in DepCands.
657
658 unsigned TotalComparisons = 0;
659
661 for (unsigned Index = 0; Index < Pointers.size(); ++Index)
662 PositionMap[Pointers[Index].PointerValue].push_back(Index);
663
664 // We need to keep track of what pointers we've already seen so we
665 // don't process them twice.
667
668 // Go through all equivalence classes, get the "pointer check groups"
669 // and add them to the overall solution. We use the order in which accesses
670 // appear in 'Pointers' to enforce determinism.
671 for (unsigned I = 0; I < Pointers.size(); ++I) {
672 // We've seen this pointer before, and therefore already processed
673 // its equivalence class.
674 if (Seen.contains(I))
675 continue;
676
678 Pointers[I].IsWritePtr);
679
680 // If there is no entry in the dependency partition, there are no potential
681 // accesses to merge; simply add a new pointer checking group.
682 if (!DepCands.contains(Access)) {
683 CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
684 continue;
685 }
686
688
689 // Because DepCands is constructed by visiting accesses in the order in
690 // which they appear in alias sets (which is deterministic) and the
691 // iteration order within an equivalence class member is only dependent on
692 // the order in which unions and insertions are performed on the
693 // equivalence class, the iteration order is deterministic.
694 for (auto M : DepCands.members(Access)) {
695 auto PointerI = PositionMap.find(M.getPointer());
696 // If we can't find the pointer in PositionMap that means we can't
697 // generate a memcheck for it.
698 if (PointerI == PositionMap.end())
699 continue;
700 for (unsigned Pointer : PointerI->second) {
701 bool Merged = false;
702 // Mark this pointer as seen.
703 Seen.insert(Pointer);
704
705 // Go through all the existing sets and see if we can find one
706 // which can include this pointer.
707 for (RuntimeCheckingPtrGroup &Group : Groups) {
708 // Don't perform more than a certain amount of comparisons.
709 // This should limit the cost of grouping the pointers to something
710 // reasonable. If we do end up hitting this threshold, the algorithm
711 // will create separate groups for all remaining pointers.
712 if (TotalComparisons > MemoryCheckMergeThreshold)
713 break;
714
715 TotalComparisons++;
716
717 if (Group.addPointer(Pointer, *this)) {
718 Merged = true;
719 break;
720 }
721 }
722
723 if (!Merged)
724 // We couldn't add this pointer to any existing set or the threshold
725 // for the number of comparisons has been reached. Create a new group
726 // to hold the current pointer.
727 Groups.emplace_back(Pointer, *this);
728 }
729 }
730
731 // We've computed the grouped checks for this partition.
732 // Save the results and continue with the next one.
734 }
735}
736
738 const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
739 unsigned PtrIdx2) {
740 return (PtrToPartition[PtrIdx1] != -1 &&
741 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
742}
743
744bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
745 const PointerInfo &PointerI = Pointers[I];
746 const PointerInfo &PointerJ = Pointers[J];
747
748 // No need to check if two readonly pointers intersect.
749 if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
750 return false;
751
752 // Only need to check pointers between two different dependency sets.
753 if (PointerI.DependencySetId == PointerJ.DependencySetId)
754 return false;
755
756 // Only need to check pointers in the same alias set.
757 return PointerI.AliasSetId == PointerJ.AliasSetId;
758}
759
760/// Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
764 for (const auto &[Idx, CG] : enumerate(CheckingGroups))
765 PtrIndices[&CG] = Idx;
766 return PtrIndices;
767}
768
771 unsigned Depth) const {
772 unsigned N = 0;
773 auto PtrIndices = getPtrToIdxMap(CheckingGroups);
774 for (const auto &[Check1, Check2] : Checks) {
775 const auto &First = Check1->Members, &Second = Check2->Members;
776 OS.indent(Depth) << "Check " << N++ << ":\n";
777 OS.indent(Depth + 2) << "Comparing group GRP" << PtrIndices.at(Check1)
778 << ":\n";
779 for (unsigned K : First)
780 OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
781 OS.indent(Depth + 2) << "Against group GRP" << PtrIndices.at(Check2)
782 << ":\n";
783 for (unsigned K : Second)
784 OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
785 }
786}
787
789
790 OS.indent(Depth) << "Run-time memory checks:\n";
791 printChecks(OS, Checks, Depth);
792
793 OS.indent(Depth) << "Grouped accesses:\n";
794 auto PtrIndices = getPtrToIdxMap(CheckingGroups);
795 for (const auto &CG : CheckingGroups) {
796 OS.indent(Depth + 2) << "Group GRP" << PtrIndices.at(&CG) << ":\n";
797 OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
798 << ")\n";
799 for (unsigned Member : CG.Members) {
800 OS.indent(Depth + 6) << "Member: " << *Pointers[Member].Expr << "\n";
801 }
802 }
803}
804
805namespace {
806
807/// Analyses memory accesses in a loop.
808///
809/// Checks whether run time pointer checks are needed and builds sets for data
810/// dependence checking.
811class AccessAnalysis {
812public:
813 using MemAccessInfo =
814 PointerIntPair<Value * /* AccessPtr */, 1, bool /* IsWrite */>;
815
816 AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,
819 SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
820 : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),
821 PSE(PSE), LoopAliasScopes(LoopAliasScopes) {
822 // We're analyzing dependences across loop iterations.
823 BAA.enableCrossIterationMode();
824 }
825
826 /// Register a load and whether it is only read from.
827 void addLoad(const MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
828 Value *Ptr = const_cast<Value *>(Loc.Ptr);
829 AST.add(adjustLoc(Loc));
830 Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
831 if (IsReadOnly)
832 ReadOnlyPtr.insert(Ptr);
833 }
834
835 /// Register a store.
836 void addStore(const MemoryLocation &Loc, Type *AccessTy) {
837 Value *Ptr = const_cast<Value *>(Loc.Ptr);
838 AST.add(adjustLoc(Loc));
839 Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
840 }
841
842 /// Check if we can emit a run-time no-alias check for \p Access.
843 ///
844 /// Returns true if we can emit a run-time no alias check for \p Access.
845 /// If we can check this access, this also adds it to a dependence set and
846 /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
847 /// we will attempt to use additional run-time checks in order to get
848 /// the bounds of the pointer.
849 bool createCheckForAccess(RuntimePointerChecking &RtCheck,
850 MemAccessInfo Access, Type *AccessTy,
851 const DenseMap<Value *, const SCEV *> &Strides,
852 DenseMap<Value *, unsigned> &DepSetId,
853 Loop *TheLoop, unsigned &RunningDepId,
854 unsigned ASId, bool Assume);
855
856 /// Check whether we can check the pointers at runtime for
857 /// non-intersection.
858 ///
859 /// Returns true if we need no check or if we do and we can generate them
860 /// (i.e. the pointers have computable bounds). A return value of false means
861 /// we couldn't analyze and generate runtime checks for all pointers in the
862 /// loop, but if \p AllowPartial is set then we will have checks for those
863 /// pointers we could analyze. \p DepChecker is used to remove unknown
864 /// dependences from DepCands.
865 bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,
866 const DenseMap<Value *, const SCEV *> &Strides,
867 Value *&UncomputablePtr, bool AllowPartial,
868 const MemoryDepChecker &DepChecker);
869
870 /// Goes over all memory accesses, checks whether a RT check is needed
871 /// and builds sets of dependent accesses.
872 void buildDependenceSets();
873
874 /// Initial processing of memory accesses determined that we need to
875 /// perform dependency checking.
876 ///
877 /// Note that this can later be cleared if we retry memcheck analysis without
878 /// dependency checking (i.e. ShouldRetryWithRuntimeChecks).
879 bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
880
881 /// We decided that no dependence analysis would be used. Reset the state.
882 void resetDepChecks(MemoryDepChecker &DepChecker) {
883 CheckDeps.clear();
884 DepChecker.clearDependences();
885 }
886
887 ArrayRef<MemAccessInfo> getDependenciesToCheck() const { return CheckDeps; }
888
889private:
890 using PtrAccessMap = MapVector<MemAccessInfo, SmallSetVector<Type *, 1>>;
891
892 /// Adjust the MemoryLocation so that it represents accesses to this
893 /// location across all iterations, rather than a single one.
894 MemoryLocation adjustLoc(MemoryLocation Loc) const {
895 // The accessed location varies within the loop, but remains within the
896 // underlying object.
898 Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
899 Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
900 return Loc;
901 }
902
903 /// Drop alias scopes that are only valid within a single loop iteration.
904 MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
905 if (!ScopeList)
906 return nullptr;
907
908 // For the sake of simplicity, drop the whole scope list if any scope is
909 // iteration-local.
910 if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
911 return LoopAliasScopes.contains(cast<MDNode>(Scope));
912 }))
913 return nullptr;
914
915 return ScopeList;
916 }
917
918 /// Map of all accesses. Values are the types used to access memory pointed to
919 /// by the pointer.
920 PtrAccessMap Accesses;
921
922 /// The loop being checked.
923 const Loop *TheLoop;
924
925 /// List of accesses that need a further dependence check.
927
928 /// Set of pointers that are read only.
929 SmallPtrSet<Value*, 16> ReadOnlyPtr;
930
931 /// Batched alias analysis results.
932 BatchAAResults BAA;
933
934 /// An alias set tracker to partition the access set by underlying object and
935 //intrinsic property (such as TBAA metadata).
936 AliasSetTracker AST;
937
938 /// The LoopInfo of the loop being checked.
939 const LoopInfo *LI;
940
941 /// The dominator tree of the function.
942 DominatorTree &DT;
943
944 /// Sets of potentially dependent accesses - members of one set share an
945 /// underlying pointer. The set "CheckDeps" identfies which sets really need a
946 /// dependence check.
948
949 /// Initial processing of memory accesses determined that we may need
950 /// to add memchecks. Perform the analysis to determine the necessary checks.
951 ///
952 /// Note that, this is different from isDependencyCheckNeeded. When we retry
953 /// memcheck analysis without dependency checking
954 /// (i.e. ShouldRetryWithRuntimeChecks), isDependencyCheckNeeded is
955 /// cleared while this remains set if we have potentially dependent accesses.
956 bool IsRTCheckAnalysisNeeded = false;
957
958 /// The SCEV predicate containing all the SCEV-related assumptions.
959 PredicatedScalarEvolution &PSE;
960
961 DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
962
963 /// Alias scopes that are declared inside the loop, and as such not valid
964 /// across iterations.
965 SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
966};
967
968} // end anonymous namespace
969
970std::optional<int64_t>
972 Type *AccessTy, Value *Ptr,
974 if (isa<ScalableVectorType>(AccessTy)) {
975 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
976 << "\n");
977 return std::nullopt;
978 }
979
980 // The access function must stride over the innermost loop.
981 if (Lp != AR->getLoop()) {
982 LLVM_DEBUG({
983 dbgs() << "LAA: Bad stride - Not striding over innermost loop ";
984 if (Ptr)
985 dbgs() << *Ptr << " ";
986
987 dbgs() << "SCEV: " << *AR << "\n";
988 });
989 return std::nullopt;
990 }
991
992 // Check the step is constant.
993 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
994
995 // Calculate the pointer stride and check if it is constant.
996 const APInt *APStepVal;
997 if (!match(Step, m_scev_APInt(APStepVal))) {
998 LLVM_DEBUG({
999 dbgs() << "LAA: Bad stride - Not a constant strided ";
1000 if (Ptr)
1001 dbgs() << *Ptr << " ";
1002 dbgs() << "SCEV: " << *AR << "\n";
1003 });
1004 return std::nullopt;
1005 }
1006
1007 const auto &DL = Lp->getHeader()->getDataLayout();
1008 TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1009 int64_t Size = AllocSize.getFixedValue();
1010
1011 // Huge step value - give up.
1012 std::optional<int64_t> StepVal = APStepVal->trySExtValue();
1013 if (!StepVal)
1014 return std::nullopt;
1015
1016 // Strided access.
1017 return *StepVal % Size ? std::nullopt : std::make_optional(*StepVal / Size);
1018}
1019
1020/// Check whether \p AR is a non-wrapping AddRec. If \p Ptr is not nullptr, use
1021/// information from the IR pointer value to determine no-wrap. If \p Predicates
1022/// is not nullptr add no-wrap assumptions if needed.
1023static bool
1025 Type *AccessTy, const Loop *L, const DominatorTree &DT,
1026 std::optional<int64_t> Stride = std::nullopt,
1027 SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr) {
1028 // FIXME: This should probably only return true for NUW.
1029 if (any(AR->getNoWrapFlags(SCEV::NoWrapMask)))
1030 return true;
1031
1033 return true;
1034
1035 // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
1036 // the distance between the previously accessed location and the wrapped
1037 // location will be larger than half the pointer index type space. In that
1038 // case, the GEP would be poison and any memory access dependent on it would
1039 // be immediate UB when executed.
1041 GEP && GEP->hasNoUnsignedSignedWrap()) {
1042 // For the above reasoning to apply, the pointer must be dereferenced in
1043 // every iteration.
1044 if (L->getHeader() == L->getLoopLatch() ||
1045 any_of(GEP->users(), [L, &DT, GEP](User *U) {
1046 if (getLoadStorePointerOperand(U) != GEP)
1047 return false;
1048 BasicBlock *UserBB = cast<Instruction>(U)->getParent();
1049 if (!L->contains(UserBB))
1050 return false;
1051 return !LoopAccessInfo::blockNeedsPredication(UserBB, L, &DT);
1052 }))
1053 return true;
1054 }
1055
1056 if (!Stride)
1057 Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
1058 if (Stride) {
1059 // If the null pointer is undefined, then a access sequence which would
1060 // otherwise access it can be assumed not to unsigned wrap. Note that this
1061 // assumes the object in memory is aligned to the natural alignment.
1062 unsigned AddrSpace = AR->getType()->getPointerAddressSpace();
1063 if (!NullPointerIsDefined(L->getHeader()->getParent(), AddrSpace) &&
1064 (Stride == 1 || Stride == -1))
1065 return true;
1066 }
1067
1068 if (Ptr && Predicates) {
1069 ScalarEvolution &SE = *PSE.getSE();
1073 Predicates->push_back(SE.getWrapPredicate(AR, Flags));
1074 LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
1075 << "LAA: Pointer: " << *Ptr << "\n"
1076 << "LAA: SCEV: " << *AR << "\n"
1077 << "LAA: Added an overflow assumption\n");
1078 return true;
1079 }
1080
1081 return false;
1082}
1083
1084static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
1085 function_ref<void(Value *)> AddPointer) {
1087 SmallVector<Value *> WorkList;
1088 WorkList.push_back(StartPtr);
1089
1090 while (!WorkList.empty()) {
1091 Value *Ptr = WorkList.pop_back_val();
1092 if (!Visited.insert(Ptr).second)
1093 continue;
1094 auto *PN = dyn_cast<PHINode>(Ptr);
1095 // SCEV does not look through non-header PHIs inside the loop. Such phis
1096 // can be analyzed by adding separate accesses for each incoming pointer
1097 // value.
1098 if (PN && InnermostLoop.contains(PN->getParent()) &&
1099 PN->getParent() != InnermostLoop.getHeader()) {
1100 llvm::append_range(WorkList, PN->incoming_values());
1101 } else
1102 AddPointer(Ptr);
1103 }
1104}
1105
1106// Walk back through the IR for a pointer, looking for a select like the
1107// following:
1108//
1109// %offset = select i1 %cmp, i64 %a, i64 %b
1110// %addr = getelementptr double, double* %base, i64 %offset
1111// %ld = load double, double* %addr, align 8
1112//
1113// We won't be able to form a single SCEVAddRecExpr from this since the
1114// address for each loop iteration depends on %cmp. We could potentially
1115// produce multiple valid SCEVAddRecExprs, though, and check all of them for
1116// memory safety/aliasing if needed.
1117//
1118// If we encounter some IR we don't yet handle, or something obviously fine
1119// like a constant, then we just add the SCEV for that term to the list passed
1120// in by the caller. If we have a node that may potentially yield a valid
1121// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
1122// ourselves before adding to the list.
1124 ScalarEvolution *SE, const Loop *L, Value *Ptr,
1126 unsigned Depth) {
1127 // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
1128 // we've exceeded our limit on recursion, just return whatever we have
1129 // regardless of whether it can be used for a forked pointer or not, along
1130 // with an indication of whether it might be a poison or undef value.
1131 const SCEV *Scev = SE->getSCEV(Ptr);
1132 if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
1133 !isa<Instruction>(Ptr) || Depth == 0) {
1134 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1135 return;
1136 }
1137
1138 Depth--;
1139
1140 auto UndefPoisonCheck = [](PointerIntPair<const SCEV *, 1, bool> S) {
1141 return get<1>(S);
1142 };
1143
1144 auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
1145 switch (Opcode) {
1146 case Instruction::Add:
1147 return SE->getAddExpr(L, R);
1148 case Instruction::Sub:
1149 return SE->getMinusSCEV(L, R);
1150 default:
1151 llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
1152 }
1153 };
1154
1156 unsigned Opcode = I->getOpcode();
1157 switch (Opcode) {
1158 case Instruction::GetElementPtr: {
1159 auto *GEP = cast<GetElementPtrInst>(I);
1160 Type *SourceTy = GEP->getSourceElementType();
1161 // We only handle base + single offset GEPs here for now.
1162 // Not dealing with preexisting gathers yet, so no vectors.
1163 if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
1164 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP));
1165 break;
1166 }
1169 findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
1170 findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
1171
1172 // See if we need to freeze our fork...
1173 bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
1174 any_of(OffsetScevs, UndefPoisonCheck);
1175
1176 // Check that we only have a single fork, on either the base or the offset.
1177 // Copy the SCEV across for the one without a fork in order to generate
1178 // the full SCEV for both sides of the GEP.
1179 if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
1180 BaseScevs.push_back(BaseScevs[0]);
1181 else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
1182 OffsetScevs.push_back(OffsetScevs[0]);
1183 else {
1184 ScevList.emplace_back(Scev, NeedsFreeze);
1185 break;
1186 }
1187
1188 Type *IntPtrTy = SE->getEffectiveSCEVType(GEP->getPointerOperandType());
1189
1190 // Find the size of the type being pointed to. We only have a single
1191 // index term (guarded above) so we don't need to index into arrays or
1192 // structures, just get the size of the scalar value.
1193 const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
1194
1195 for (auto [B, O] : zip(BaseScevs, OffsetScevs)) {
1196 const SCEV *Base = get<0>(B);
1197 const SCEV *Offset = get<0>(O);
1198
1199 // Scale up the offsets by the size of the type, then add to the bases.
1200 const SCEV *Scaled =
1202 ScevList.emplace_back(SE->getAddExpr(Base, Scaled), NeedsFreeze);
1203 }
1204 break;
1205 }
1206 case Instruction::Select: {
1208 // A select means we've found a forked pointer, but we currently only
1209 // support a single select per pointer so if there's another behind this
1210 // then we just bail out and return the generic SCEV.
1211 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
1212 findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
1213 if (ChildScevs.size() == 2)
1214 append_range(ScevList, ChildScevs);
1215 else
1216 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1217 break;
1218 }
1219 case Instruction::PHI: {
1221 // A phi means we've found a forked pointer, but we currently only
1222 // support a single phi per pointer so if there's another behind this
1223 // then we just bail out and return the generic SCEV.
1224 if (I->getNumOperands() == 2) {
1225 findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth);
1226 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
1227 }
1228 if (ChildScevs.size() == 2)
1229 append_range(ScevList, ChildScevs);
1230 else
1231 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1232 break;
1233 }
1234 case Instruction::Add:
1235 case Instruction::Sub: {
1238 findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth);
1239 findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth);
1240
1241 // See if we need to freeze our fork...
1242 bool NeedsFreeze =
1243 any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
1244
1245 // Check that we only have a single fork, on either the left or right side.
1246 // Copy the SCEV across for the one without a fork in order to generate
1247 // the full SCEV for both sides of the BinOp.
1248 if (LScevs.size() == 2 && RScevs.size() == 1)
1249 RScevs.push_back(RScevs[0]);
1250 else if (RScevs.size() == 2 && LScevs.size() == 1)
1251 LScevs.push_back(LScevs[0]);
1252 else {
1253 ScevList.emplace_back(Scev, NeedsFreeze);
1254 break;
1255 }
1256
1257 for (auto [L, R] : zip(LScevs, RScevs))
1258 ScevList.emplace_back(GetBinOpExpr(Opcode, get<0>(L), get<0>(R)),
1259 NeedsFreeze);
1260 break;
1261 }
1262 default:
1263 // Just return the current SCEV if we haven't handled the instruction yet.
1264 LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
1265 ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1266 break;
1267 }
1268}
1269
1270bool AccessAnalysis::createCheckForAccess(
1271 RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy,
1272 const DenseMap<Value *, const SCEV *> &StridesMap,
1273 DenseMap<Value *, unsigned> &DepSetId, Loop *TheLoop,
1274 unsigned &RunningDepId, unsigned ASId, bool Assume) {
1275 Value *Ptr = Access.getPointer();
1276 ScalarEvolution *SE = PSE.getSE();
1277 assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
1278
1280 findForkedSCEVs(SE, TheLoop, Ptr, RTCheckPtrs, MaxForkedSCEVDepth);
1281 assert(!RTCheckPtrs.empty() &&
1282 "Must have some runtime-check pointer candidates");
1283
1284 // RTCheckPtrs must have size 2 if there are forked pointers. Otherwise, there
1285 // are no forked pointers; replaceSymbolicStridesSCEV in this case.
1286 auto IsLoopInvariantOrAR =
1287 [&SE, &TheLoop](const PointerIntPair<const SCEV *, 1, bool> &P) {
1288 return SE->isLoopInvariant(P.getPointer(), TheLoop) ||
1289 isa<SCEVAddRecExpr>(P.getPointer());
1290 };
1291 if (RTCheckPtrs.size() == 2 && all_of(RTCheckPtrs, IsLoopInvariantOrAR)) {
1292 LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n";
1293 for (const auto &[Idx, Q] : enumerate(RTCheckPtrs)) dbgs()
1294 << "\t(" << Idx << ") " << *Q.getPointer() << "\n");
1295 } else {
1296 RTCheckPtrs = {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
1297 }
1298
1299 /// Check whether all pointers can participate in a runtime bounds check. They
1300 /// must either be invariant or non-wrapping affine AddRecs.
1302 for (auto &P : RTCheckPtrs) {
1303 // The bounds for loop-invariant pointer is trivial.
1304 if (SE->isLoopInvariant(P.getPointer(), TheLoop))
1305 continue;
1306
1307 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer());
1308 if (!AR && Assume)
1309 AR = PSE.getAsAddRec(Ptr, &Predicates);
1310 if (!AR || !AR->isAffine())
1311 return false;
1312
1313 // If there's only one option for Ptr, commit the predicates collected by
1314 // getAsAddRec and look Ptr up again afterwards: the lookup below reads the
1315 // assumptions back from PSE, so they need to be committed first.
1316 if (RTCheckPtrs.size() == 1) {
1317 PSE.addPredicates(Predicates);
1318 Predicates.clear();
1319 if (auto *StrideAR = dyn_cast<SCEVAddRecExpr>(
1320 replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr)))
1321 AR = StrideAR;
1322 P.setPointer(AR);
1323 }
1324
1325 if (!isNoWrap(PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
1326 TheLoop, DT, /*Stride=*/std::nullopt,
1327 Assume ? &Predicates : nullptr))
1328 return false;
1329 }
1330 PSE.addPredicates(Predicates);
1331
1332 for (const auto &[PtrExpr, NeedsFreeze] : RTCheckPtrs) {
1333 // The id of the dependence set.
1334 unsigned DepId;
1335
1336 if (DepCands.contains(Access)) {
1337 Value *Leader = DepCands.getLeaderValue(Access).getPointer();
1338 unsigned &LeaderId = DepSetId[Leader];
1339 if (!LeaderId)
1340 LeaderId = RunningDepId++;
1341 DepId = LeaderId;
1342 } else
1343 // Each access has its own dependence set.
1344 DepId = RunningDepId++;
1345
1346 bool IsWrite = Access.getInt();
1347 RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1348 NeedsFreeze);
1349 LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
1350 }
1351
1352 return true;
1353}
1354
1355bool AccessAnalysis::canCheckPtrAtRT(
1356 RuntimePointerChecking &RtCheck, Loop *TheLoop,
1357 const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr,
1358 bool AllowPartial, const MemoryDepChecker &DepChecker) {
1359 // Find pointers with computable bounds. We are going to use this information
1360 // to place a runtime bound check.
1361 bool CanDoRT = true;
1362
1363 bool MayNeedRTCheck = false;
1364 if (!IsRTCheckAnalysisNeeded) return true;
1365
1366 if (auto *Deps = DepChecker.getDependences()) {
1367 // If there are unknown dependences, this means runtime checks are needed to
1368 // ensure there's no overlap between accesses to the same underlying object.
1369 // Remove the equivalence classes containing both source and destination
1370 // accesses from DepCands. This ensures runtime checks will be generated
1371 // between those accesses and prevents them from being grouped together.
1372 for (const auto &Dep : *Deps) {
1373 if (Dep.Type != MemoryDepChecker::Dependence::Unknown) {
1376 "Should only skip safe dependences");
1377 continue;
1378 }
1379 Instruction *Src = Dep.getSource(DepChecker);
1380 Instruction *Dst = Dep.getDestination(DepChecker);
1381 DepCands.eraseClass({getPointerOperand(Src), Src->mayWriteToMemory()});
1382 DepCands.eraseClass({getPointerOperand(Dst), Dst->mayWriteToMemory()});
1383 }
1384 } else {
1385 CheckDeps.clear();
1386 DepCands = {};
1387 }
1388
1389 // We assign a consecutive id to access from different alias sets.
1390 // Accesses between different groups doesn't need to be checked.
1391 unsigned ASId = 0;
1392 for (const auto &AS : AST) {
1393 int NumReadPtrChecks = 0;
1394 int NumWritePtrChecks = 0;
1395 bool CanDoAliasSetRT = true;
1396 ++ASId;
1397 auto ASPointers = AS.getPointers();
1398
1399 // We assign consecutive id to access from different dependence sets.
1400 // Accesses within the same set don't need a runtime check.
1401 unsigned RunningDepId = 1;
1403
1405
1406 // First, count how many write and read accesses are in the alias set. Also
1407 // collect MemAccessInfos for later.
1409 for (const Value *ConstPtr : ASPointers) {
1410 Value *Ptr = const_cast<Value *>(ConstPtr);
1411 bool IsWrite = Accesses.contains(MemAccessInfo(Ptr, true));
1412 if (IsWrite)
1413 ++NumWritePtrChecks;
1414 else
1415 ++NumReadPtrChecks;
1416 AccessInfos.emplace_back(Ptr, IsWrite);
1417 }
1418
1419 // We do not need runtime checks for this alias set, if there are no writes
1420 // or a single write and no reads.
1421 if (NumWritePtrChecks == 0 ||
1422 (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1423 assert((ASPointers.size() <= 1 ||
1424 all_of(ASPointers,
1425 [this](const Value *Ptr) {
1426 MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),
1427 true);
1428 return !DepCands.contains(AccessWrite);
1429 })) &&
1430 "Can only skip updating CanDoRT below, if all entries in AS "
1431 "are reads or there is at most 1 entry");
1432 continue;
1433 }
1434
1435 for (auto &Access : AccessInfos) {
1436 for (const auto &AccessTy : Accesses[Access]) {
1437 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1438 DepSetId, TheLoop, RunningDepId, ASId,
1439 false)) {
1440 LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1441 << *Access.getPointer() << '\n');
1442 Retries.emplace_back(Access, AccessTy);
1443 CanDoAliasSetRT = false;
1444 }
1445 }
1446 }
1447
1448 // Note that this function computes CanDoRT and MayNeedRTCheck
1449 // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
1450 // we have a pointer for which we couldn't find the bounds but we don't
1451 // actually need to emit any checks so it does not matter.
1452 //
1453 // We need runtime checks for this alias set, if there are at least 2
1454 // dependence sets (in which case RunningDepId > 2) or if we need to re-try
1455 // any bound checks (because in that case the number of dependence sets is
1456 // incomplete).
1457 bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1458
1459 // We need to perform run-time alias checks, but some pointers had bounds
1460 // that couldn't be checked.
1461 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1462 // Reset the CanDoSetRt flag and retry all accesses that have failed.
1463 // We know that we need these checks, so we can now be more aggressive
1464 // and add further checks if required (overflow checks).
1465 CanDoAliasSetRT = true;
1466 for (const auto &[Access, AccessTy] : Retries) {
1467 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1468 DepSetId, TheLoop, RunningDepId, ASId,
1469 /*Assume=*/true)) {
1470 CanDoAliasSetRT = false;
1471 UncomputablePtr = Access.getPointer();
1472 if (!AllowPartial)
1473 break;
1474 }
1475 }
1476 }
1477
1478 CanDoRT &= CanDoAliasSetRT;
1479 MayNeedRTCheck |= NeedsAliasSetRTCheck;
1480 ++ASId;
1481 }
1482
1483 // If the pointers that we would use for the bounds comparison have different
1484 // address spaces, assume the values aren't directly comparable, so we can't
1485 // use them for the runtime check. We also have to assume they could
1486 // overlap. In the future there should be metadata for whether address spaces
1487 // are disjoint.
1488 unsigned NumPointers = RtCheck.Pointers.size();
1489 for (unsigned i = 0; i < NumPointers; ++i) {
1490 for (unsigned j = i + 1; j < NumPointers; ++j) {
1491 // Only need to check pointers between two different dependency sets.
1492 if (RtCheck.Pointers[i].DependencySetId ==
1493 RtCheck.Pointers[j].DependencySetId)
1494 continue;
1495 // Only need to check pointers in the same alias set.
1496 if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
1497 continue;
1498
1499 Value *PtrI = RtCheck.Pointers[i].PointerValue;
1500 Value *PtrJ = RtCheck.Pointers[j].PointerValue;
1501
1502 unsigned ASi = PtrI->getType()->getPointerAddressSpace();
1503 unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
1504 if (ASi != ASj) {
1505 LLVM_DEBUG(
1506 dbgs() << "LAA: Runtime check would require comparison between"
1507 " different address spaces\n");
1508 return false;
1509 }
1510 }
1511 }
1512
1513 if (MayNeedRTCheck && (CanDoRT || AllowPartial))
1514 RtCheck.generateChecks(DepCands);
1515
1516 LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
1517 << " pointer comparisons.\n");
1518
1519 // If we can do run-time checks, but there are no checks, no runtime checks
1520 // are needed. This can happen when all pointers point to the same underlying
1521 // object for example.
1522 RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
1523
1524 bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1525 assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&
1526 "CanDoRTIfNeeded depends on RtCheck.Need");
1527 if (!CanDoRTIfNeeded && !AllowPartial)
1528 RtCheck.reset();
1529 return CanDoRTIfNeeded;
1530}
1531
1532void AccessAnalysis::buildDependenceSets() {
1533 // We process the set twice: first we process read-write pointers, last we
1534 // process read-only pointers. This allows us to skip dependence tests for
1535 // read-only pointers.
1536
1537 LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1538 LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
1539 LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
1540 LLVM_DEBUG({
1541 for (const auto &[A, _] : Accesses)
1542 dbgs() << "\t" << *A.getPointer() << " ("
1543 << (A.getInt()
1544 ? "write"
1545 : (ReadOnlyPtr.contains(A.getPointer()) ? "read-only"
1546 : "read"))
1547 << ")\n";
1548 });
1549
1550 // The AliasSetTracker has nicely partitioned our pointers by metadata
1551 // compatibility and potential for underlying-object overlap. As a result, we
1552 // only need to check for potential pointer dependencies within each alias
1553 // set.
1554 for (const auto &AS : AST) {
1555 bool AliasSetHasWrite = false;
1556
1557 // Map of (pointer to underlying objects, accessed address space) to last
1558 // access encountered.
1559 using UnderlyingObjToAccessMap =
1561 UnderlyingObjToAccessMap ObjToLastAccess;
1562
1563 // Set of access to check after all writes have been processed.
1564 PtrAccessMap DeferredAccesses;
1565
1566 // Iterate over each alias set twice, once to process read/write pointers,
1567 // and then to process read-only pointers.
1568
1569 auto ProcessAccesses = [&](bool UseDeferred) {
1570 PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1571
1572 // Note that both the alias-set tracker and the alias sets themselves used
1573 // ordered collections internally and so the iteration order here is
1574 // deterministic.
1575 for (const Value *ConstPtr : AS.getPointers()) {
1576 Value *Ptr = const_cast<Value *>(ConstPtr);
1577
1578 // For a single memory access in AliasSetTracker, Accesses may contain
1579 // both read and write, and they both need to be handled for CheckDeps.
1580 for (auto [AccessPtr, IsWrite] : S.keys()) {
1581 if (AccessPtr != Ptr)
1582 continue;
1583
1584 // If we're using the deferred access set, then it contains only
1585 // reads.
1586 bool IsReadOnlyPtr = ReadOnlyPtr.contains(Ptr) && !IsWrite;
1587 if (UseDeferred && !IsReadOnlyPtr)
1588 continue;
1589 // Otherwise, the pointer must be in the PtrAccessSet, either as a
1590 // read or a write.
1591 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1592 S.contains(MemAccessInfo(Ptr, false))) &&
1593 "Alias-set pointer not in the access set?");
1594
1595 MemAccessInfo Access(Ptr, IsWrite);
1596 DepCands.insert(Access);
1597
1598 // Memorize read-only pointers for later processing and skip them in
1599 // the first round (they need to be checked after we have seen all
1600 // write pointers). Note: we also mark pointer that are not
1601 // consecutive as "read-only" pointers (so that we check
1602 // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
1603 if (!UseDeferred && IsReadOnlyPtr) {
1604 // We only use the pointer keys, the types vector values don't
1605 // matter.
1606 DeferredAccesses.insert({Access, {}});
1607 continue;
1608 }
1609
1610 // If this is a write - check other reads and writes for conflicts. If
1611 // this is a read only check other writes for conflicts (but only if
1612 // there is no other write to the ptr - this is an optimization to
1613 // catch "a[i] = a[i] + " without having to do a dependence check).
1614 if ((IsWrite || IsReadOnlyPtr) && AliasSetHasWrite) {
1615 CheckDeps.push_back(Access);
1616 IsRTCheckAnalysisNeeded = true;
1617 }
1618
1619 if (IsWrite)
1620 AliasSetHasWrite = true;
1621
1622 // Create sets of pointers connected by a shared alias set and
1623 // underlying object.
1624 SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr];
1625 UOs = {};
1626 ::getUnderlyingObjects(Ptr, UOs, LI);
1628 << "Underlying objects for pointer " << *Ptr << "\n");
1629 for (const Value *UnderlyingObj : UOs) {
1630 // nullptr never alias, don't join sets for pointer that have "null"
1631 // in their UnderlyingObjects list.
1632 if (isa<ConstantPointerNull>(UnderlyingObj) &&
1634 TheLoop->getHeader()->getParent(),
1635 UnderlyingObj->getType()->getPointerAddressSpace()))
1636 continue;
1637
1638 auto [It, Inserted] = ObjToLastAccess.try_emplace(
1639 {UnderlyingObj,
1640 cast<PointerType>(Ptr->getType())->getAddressSpace()},
1641 Access);
1642 if (!Inserted) {
1643 DepCands.unionSets(Access, It->second);
1644 It->second = Access;
1645 }
1646
1647 LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
1648 }
1649 }
1650 }
1651 };
1652
1653 ProcessAccesses(false);
1654 ProcessAccesses(true);
1655 }
1656}
1657
1658/// Check whether the access through \p Ptr has a constant stride.
1659std::optional<int64_t> llvm::getPtrStride(
1660 PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp,
1661 const DominatorTree &DT, const DenseMap<Value *, const SCEV *> &StridesMap,
1662 bool ShouldCheckWrap, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
1663 const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1664 if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
1665 return 0;
1666
1667 assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
1668
1669 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1670 if (Predicates && !AR) {
1671 AR = PSE.getSE()->convertSCEVToAddRecWithPredicates(PtrScev, Lp,
1672 *Predicates);
1673 }
1674
1675 if (!AR) {
1676 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1677 << " SCEV: " << *PtrScev << "\n");
1678 return std::nullopt;
1679 }
1680
1681 std::optional<int64_t> Stride =
1682 getStrideFromAddRec(AR, Lp, AccessTy, Ptr, PSE);
1683 if (!ShouldCheckWrap || !Stride)
1684 return Stride;
1685
1686 if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, DT, Stride, Predicates))
1687 return Stride;
1688
1689 LLVM_DEBUG(
1690 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1691 << *Ptr << " SCEV: " << *AR << "\n");
1692 return std::nullopt;
1693}
1694
1695/// Check whether the access through \p Ptr has a constant stride.
1696std::optional<int64_t>
1698 const Loop *Lp, const DominatorTree &DT,
1699 const DenseMap<Value *, const SCEV *> &StridesMap,
1700 bool Assume, bool ShouldCheckWrap) {
1702 std::optional<int64_t> Stride =
1703 getPtrStride(PSE, AccessTy, Ptr, Lp, DT, StridesMap, ShouldCheckWrap,
1704 Assume ? &Predicates : nullptr);
1705 PSE.addPredicates(Predicates);
1706 return Stride;
1707}
1708
1709std::optional<int64_t> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
1710 Type *ElemTyB, Value *PtrB,
1711 const DataLayout &DL,
1712 ScalarEvolution &SE,
1713 bool StrictCheck, bool CheckType) {
1714 assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1715
1716 // Make sure that A and B are different pointers.
1717 if (PtrA == PtrB)
1718 return 0;
1719
1720 // Make sure that the element types are the same if required.
1721 if (CheckType && ElemTyA != ElemTyB)
1722 return std::nullopt;
1723
1724 unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1725 unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1726
1727 // Check that the address spaces match.
1728 if (ASA != ASB)
1729 return std::nullopt;
1730 unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1731
1732 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1733 const Value *PtrA1 = PtrA->stripAndAccumulateConstantOffsets(
1734 DL, OffsetA, /*AllowNonInbounds=*/true);
1735 const Value *PtrB1 = PtrB->stripAndAccumulateConstantOffsets(
1736 DL, OffsetB, /*AllowNonInbounds=*/true);
1737
1738 std::optional<int64_t> Val;
1739 if (PtrA1 == PtrB1) {
1740 // Retrieve the address space again as pointer stripping now tracks through
1741 // `addrspacecast`.
1742 ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1743 ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1744 // Check that the address spaces match and that the pointers are valid.
1745 if (ASA != ASB)
1746 return std::nullopt;
1747
1748 IdxWidth = DL.getIndexSizeInBits(ASA);
1749 OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1750 OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1751
1752 OffsetB -= OffsetA;
1753 Val = OffsetB.trySExtValue();
1754 } else {
1755 // Otherwise compute the distance with SCEV between the base pointers.
1756 const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1757 const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1758 std::optional<APInt> Diff =
1759 SE.computeConstantDifference(PtrSCEVB, PtrSCEVA);
1760 if (!Diff)
1761 return std::nullopt;
1762 Val = Diff->trySExtValue();
1763 }
1764
1765 if (!Val)
1766 return std::nullopt;
1767
1768 int64_t Size = DL.getTypeStoreSize(ElemTyA);
1769 int64_t Dist = *Val / Size;
1770
1771 // Ensure that the calculated distance matches the type-based one after all
1772 // the bitcasts removal in the provided pointers.
1773 if (!StrictCheck || Dist * Size == Val)
1774 return Dist;
1775 return std::nullopt;
1776}
1777
1779 const DataLayout &DL, ScalarEvolution &SE,
1780 SmallVectorImpl<unsigned> &SortedIndices) {
1782 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1783 "Expected list of pointer operands.");
1784 // Walk over the pointers, and map each of them to an offset relative to
1785 // first pointer in the array.
1786 Value *Ptr0 = VL[0];
1787
1788 using DistOrdPair = std::pair<int64_t, unsigned>;
1789 auto Compare = llvm::less_first();
1790 std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1791 Offsets.emplace(0, 0);
1792 bool IsConsecutive = true;
1793 for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) {
1794 std::optional<int64_t> Diff =
1795 getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
1796 /*StrictCheck=*/true);
1797 if (!Diff)
1798 return false;
1799
1800 // Check if the pointer with the same offset is found.
1801 int64_t Offset = *Diff;
1802 auto [It, IsInserted] = Offsets.emplace(Offset, Idx);
1803 if (!IsInserted)
1804 return false;
1805 // Consecutive order if the inserted element is the last one.
1806 IsConsecutive &= std::next(It) == Offsets.end();
1807 }
1808 SortedIndices.clear();
1809 if (!IsConsecutive) {
1810 // Fill SortedIndices array only if it is non-consecutive.
1811 SortedIndices.resize(VL.size());
1812 for (auto [Idx, Off] : enumerate(Offsets))
1813 SortedIndices[Idx] = Off.second;
1814 }
1815 return true;
1816}
1817
1818/// Returns true if the memory operations \p A and \p B are consecutive.
1820 ScalarEvolution &SE, bool CheckType) {
1823 if (!PtrA || !PtrB)
1824 return false;
1825 Type *ElemTyA = getLoadStoreType(A);
1826 Type *ElemTyB = getLoadStoreType(B);
1827 std::optional<int64_t> Diff =
1828 getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1829 /*StrictCheck=*/true, CheckType);
1830 return Diff == 1;
1831}
1832
1834 visitPointers(SI->getPointerOperand(), *InnermostLoop,
1835 [this, SI](Value *Ptr) {
1836 Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1837 InstMap.push_back(SI);
1838 ++AccessIdx;
1839 });
1840}
1841
1843 visitPointers(LI->getPointerOperand(), *InnermostLoop,
1844 [this, LI](Value *Ptr) {
1845 Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1846 InstMap.push_back(LI);
1847 ++AccessIdx;
1848 });
1849}
1850
1870
1872 switch (Type) {
1873 case NoDep:
1874 case Forward:
1876 case Unknown:
1877 case IndirectUnsafe:
1878 case InvariantUnsafe:
1879 return false;
1880
1882 case Backward:
1884 return true;
1885 }
1886 llvm_unreachable("unexpected DepType!");
1887}
1888
1893
1895 switch (Type) {
1896 case Forward:
1898 return true;
1899
1900 case NoDep:
1901 case Unknown:
1903 case Backward:
1905 case IndirectUnsafe:
1906 case InvariantUnsafe:
1907 return false;
1908 }
1909 llvm_unreachable("unexpected DepType!");
1910}
1911
1912bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1913 uint64_t TypeByteSize,
1914 unsigned CommonStride) {
1915 // If loads occur at a distance that is not a multiple of a feasible vector
1916 // factor store-load forwarding does not take place.
1917 // Positive dependences might cause troubles because vectorizing them might
1918 // prevent store-load forwarding making vectorized code run a lot slower.
1919 // a[i] = a[i-3] ^ a[i-8];
1920 // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1921 // hence on your typical architecture store-load forwarding does not take
1922 // place. Vectorizing in such cases does not make sense.
1923 // Store-load forwarding distance.
1924
1925 // After this many iterations store-to-load forwarding conflicts should not
1926 // cause any slowdowns.
1927 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1928 // Maximum vector factor.
1929 uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
1930 std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
1931 MaxStoreLoadForwardSafeDistanceInBits);
1932
1933 // Compute the smallest VF at which the store and load would be misaligned.
1934 for (uint64_t VF = 2 * TypeByteSize;
1935 VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
1936 // If the number of vector iteration between the store and the load are
1937 // small we could incur conflicts.
1938 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1939 MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
1940 break;
1941 }
1942 }
1943
1944 if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
1945 LLVM_DEBUG(
1946 dbgs() << "LAA: Distance " << Distance
1947 << " that could cause a store-load forwarding conflict\n");
1948 return true;
1949 }
1950
1951 if (CommonStride &&
1952 MaxVFWithoutSLForwardIssuesPowerOf2 <
1953 MaxStoreLoadForwardSafeDistanceInBits &&
1954 MaxVFWithoutSLForwardIssuesPowerOf2 !=
1955 VectorizerParams::MaxVectorWidth * TypeByteSize) {
1956 uint64_t MaxVF =
1957 bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
1958 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1959 MaxStoreLoadForwardSafeDistanceInBits =
1960 std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
1961 }
1962 return false;
1963}
1964
1965void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1966 if (Status < S)
1967 Status = S;
1968}
1969
1970/// Given a dependence-distance \p Dist between two memory accesses, that have
1971/// strides in the same direction whose absolute value of the maximum stride is
1972/// given in \p MaxStride, in a loop whose maximum backedge taken count is \p
1973/// MaxBTC, check if it is possible to prove statically that the dependence
1974/// distance is larger than the range that the accesses will travel through the
1975/// execution of the loop. If so, return true; false otherwise. This is useful
1976/// for example in loops such as the following (PR31098):
1977///
1978/// for (i = 0; i < D; ++i) {
1979/// = out[i];
1980/// out[i+D] =
1981/// }
1983 const SCEV &MaxBTC, const SCEV &Dist,
1984 uint64_t MaxStride) {
1985
1986 // If we can prove that
1987 // (**) |Dist| > MaxBTC * Step
1988 // where Step is the absolute stride of the memory accesses in bytes,
1989 // then there is no dependence.
1990 //
1991 // Rationale:
1992 // We basically want to check if the absolute distance (|Dist/Step|)
1993 // is >= the loop iteration count (or > MaxBTC).
1994 // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1995 // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1996 // that the dependence distance is >= VF; This is checked elsewhere.
1997 // But in some cases we can prune dependence distances early, and
1998 // even before selecting the VF, and without a runtime test, by comparing
1999 // the distance against the loop iteration count. Since the vectorized code
2000 // will be executed only if LoopCount >= VF, proving distance >= LoopCount
2001 // also guarantees that distance >= VF.
2002 //
2003 const SCEV *Step = SE.getConstant(MaxBTC.getType(), MaxStride);
2004 const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
2005
2006 const SCEV *CastedDist = &Dist;
2007 const SCEV *CastedProduct = Product;
2008 uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
2009 uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
2010
2011 // The dependence distance can be positive/negative, so we sign extend Dist;
2012 // The multiplication of the absolute stride in bytes and the
2013 // backedgeTakenCount is non-negative, so we zero extend Product.
2014 if (DistTypeSizeBits > ProductTypeSizeBits)
2015 CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
2016 else
2017 CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
2018
2019 // Is Dist - (MaxBTC * Step) > 0 ?
2020 // (If so, then we have proven (**) because |Dist| >= Dist)
2021 const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
2022 if (SE.isKnownPositive(Minus))
2023 return true;
2024
2025 // Second try: Is -Dist - (MaxBTC * Step) > 0 ?
2026 // (If so, then we have proven (**) because |Dist| >= -1*Dist)
2027 const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
2028 Minus = SE.getMinusSCEV(NegDist, CastedProduct);
2029 return SE.isKnownPositive(Minus);
2030}
2031
2032/// Check the dependence for two accesses with the same stride \p Stride.
2033/// \p Distance is the positive distance in bytes, and \p TypeByteSize is type
2034/// size in bytes.
2035///
2036/// \returns true if they are independent.
2038 uint64_t TypeByteSize) {
2039 assert(Stride > 1 && "The stride must be greater than 1");
2040 assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
2041 assert(Distance > 0 && "The distance must be non-zero");
2042
2043 // Skip if the distance is not multiple of type byte size.
2044 if (Distance % TypeByteSize)
2045 return false;
2046
2047 // No dependence if the distance is not multiple of the stride.
2048 // E.g.
2049 // for (i = 0; i < 1024 ; i += 4)
2050 // A[i+2] = A[i] + 1;
2051 //
2052 // Two accesses in memory (distance is 2, stride is 4):
2053 // | A[0] | | | | A[4] | | | |
2054 // | | | A[2] | | | | A[6] | |
2055 //
2056 // E.g.
2057 // for (i = 0; i < 1024 ; i += 3)
2058 // A[i+4] = A[i] + 1;
2059 //
2060 // Two accesses in memory (distance is 4, stride is 3):
2061 // | A[0] | | | A[3] | | | A[6] | | |
2062 // | | | | | A[4] | | | A[7] | |
2063 return Distance % Stride;
2064}
2065
2066bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
2067 Type *SrcTy,
2068 const SCEV *Sink,
2069 Type *SinkTy) {
2070 const SCEV *BTC = PSE.getBackedgeTakenCount();
2071 const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
2072 ScalarEvolution &SE = *PSE.getSE();
2073 const auto &[SrcStart_, SrcEnd_] =
2074 getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
2075 &SE, &PointerBounds, DT, AC, LoopGuards);
2076 if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
2077 return false;
2078
2079 const auto &[SinkStart_, SinkEnd_] =
2080 getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
2081 &SE, &PointerBounds, DT, AC, LoopGuards);
2082 if (isa<SCEVCouldNotCompute>(SinkStart_) ||
2083 isa<SCEVCouldNotCompute>(SinkEnd_))
2084 return false;
2085
2086 if (!LoopGuards)
2087 LoopGuards.emplace(ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2088
2089 auto SrcEnd = SE.applyLoopGuards(SrcEnd_, *LoopGuards);
2090 auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);
2091 if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
2092 return true;
2093
2094 auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);
2095 auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);
2096 return SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart);
2097}
2098
2100 MemoryDepChecker::DepDistanceStrideAndSizeInfo>
2101MemoryDepChecker::getDependenceDistanceStrideAndSize(
2102 const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
2103 const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
2104 const auto &DL = InnermostLoop->getHeader()->getDataLayout();
2105 auto &SE = *PSE.getSE();
2106 const auto &[APtr, AIsWrite] = A;
2107 const auto &[BPtr, BIsWrite] = B;
2108
2109 // Two reads are independent.
2110 if (!AIsWrite && !BIsWrite)
2112
2113 Type *ATy = getLoadStoreType(AInst);
2114 Type *BTy = getLoadStoreType(BInst);
2115
2116 // We cannot check pointers in different address spaces.
2117 if (APtr->getType()->getPointerAddressSpace() !=
2118 BPtr->getType()->getPointerAddressSpace())
2120
2122 std::optional<int64_t> StrideAPtr =
2123 getPtrStride(PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides,
2124 /*ShouldCheckWrap=*/true, &Predicates);
2125 std::optional<int64_t> StrideBPtr =
2126 getPtrStride(PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides,
2127 /*ShouldCheckWrap=*/true, &Predicates);
2128 PSE.addPredicates(Predicates);
2129
2130 const SCEV *Src = PSE.getSCEV(APtr);
2131 const SCEV *Sink = PSE.getSCEV(BPtr);
2132
2133 // If the induction step is negative we have to invert source and sink of the
2134 // dependence when measuring the distance between them. We should not swap
2135 // AIsWrite with BIsWrite, as their uses expect them in program order.
2136 if (StrideAPtr && *StrideAPtr < 0) {
2137 std::swap(Src, Sink);
2138 std::swap(AInst, BInst);
2139 std::swap(ATy, BTy);
2140 std::swap(StrideAPtr, StrideBPtr);
2141 }
2142
2143 const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
2144
2145 LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
2146 << "\n");
2147 LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
2148 << ": " << *Dist << "\n");
2149
2150 // Need accesses with constant strides and the same direction for further
2151 // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
2152 // similar code or pointer arithmetic that could wrap in the address space.
2153
2154 // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
2155 // not loop-invariant (stride will be 0 in that case), we cannot analyze the
2156 // dependence further and also cannot generate runtime checks.
2157 if (!StrideAPtr || !StrideBPtr) {
2158 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
2160 }
2161
2162 int64_t StrideAPtrInt = *StrideAPtr;
2163 int64_t StrideBPtrInt = *StrideBPtr;
2164 LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
2165 << " Sink induction step: " << StrideBPtrInt << "\n");
2166 // At least Src or Sink are loop invariant and the other is strided or
2167 // invariant.
2168 if (!StrideAPtrInt || !StrideBPtrInt) {
2169 // If both are loop-invariant and access the same location, we cannot
2170 // vectorize.
2171 if (!StrideAPtrInt && !StrideBPtrInt && Dist->isZero())
2173 // Otherwise, we can generate a runtime check to disambiguate the accesses.
2175 }
2176
2177 // Both Src and Sink have a constant stride, check if they are in the same
2178 // direction.
2179 if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
2180 LLVM_DEBUG(
2181 dbgs() << "Pointer access with strides in different directions\n");
2183 }
2184
2185 TypeSize AStoreSz = DL.getTypeStoreSize(ATy);
2186 TypeSize BStoreSz = DL.getTypeStoreSize(BTy);
2187
2188 // If store sizes are not the same, set TypeByteSize to zero, so we can check
2189 // it in the caller isDependent.
2190 uint64_t ASz = DL.getTypeAllocSize(ATy);
2191 uint64_t BSz = DL.getTypeAllocSize(BTy);
2192 uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
2193
2194 uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
2195 uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
2196
2197 uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);
2198
2199 std::optional<uint64_t> CommonStride;
2200 if (StrideAScaled == StrideBScaled)
2201 CommonStride = StrideAScaled;
2202
2203 // TODO: Historically, we didn't retry with runtime checks when (unscaled)
2204 // strides were different but there is no inherent reason to.
2205 if (!isa<SCEVConstant>(Dist))
2206 ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
2207
2208 // If distance is a SCEVCouldNotCompute, return Unknown immediately.
2209 if (isa<SCEVCouldNotCompute>(Dist)) {
2210 LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n");
2211 return Dependence::Unknown;
2212 }
2213
2214 return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
2215 TypeByteSize, AIsWrite, BIsWrite);
2216}
2217
2219MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2220 const MemAccessInfo &B, unsigned BIdx) {
2221 assert(AIdx < BIdx && "Must pass arguments in program order");
2222
2223 // Check if we can prove that Sink only accesses memory after Src's end or
2224 // vice versa. The helper is used to perform the checks only on the exit paths
2225 // where it helps to improve the analysis result.
2226 auto CheckCompletelyBeforeOrAfter = [&]() {
2227 auto *APtr = A.getPointer();
2228 auto *BPtr = B.getPointer();
2229 Type *ATy = getLoadStoreType(InstMap[AIdx]);
2230 Type *BTy = getLoadStoreType(InstMap[BIdx]);
2231 const SCEV *Src = PSE.getSCEV(APtr);
2232 const SCEV *Sink = PSE.getSCEV(BPtr);
2233 return areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy);
2234 };
2235
2236 // Get the dependence distance, stride, type size and what access writes for
2237 // the dependence between A and B.
2238 auto Res =
2239 getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2240 if (std::holds_alternative<Dependence::DepType>(Res)) {
2241 if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2242 CheckCompletelyBeforeOrAfter())
2243 return Dependence::NoDep;
2244 return std::get<Dependence::DepType>(Res);
2245 }
2246
2247 auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
2248 std::get<DepDistanceStrideAndSizeInfo>(Res);
2249 bool HasSameSize = TypeByteSize > 0;
2250
2251 ScalarEvolution &SE = *PSE.getSE();
2252 auto &DL = InnermostLoop->getHeader()->getDataLayout();
2253
2254 // If the distance between the acecsses is larger than their maximum absolute
2255 // stride multiplied by the symbolic maximum backedge taken count (which is an
2256 // upper bound of the number of iterations), the accesses are independet, i.e.
2257 // they are far enough appart that accesses won't access the same location
2258 // across all loop ierations.
2259 if (HasSameSize &&
2261 DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
2262 return Dependence::NoDep;
2263
2264 // The rest of this function relies on ConstDist being at most 64-bits, which
2265 // is checked earlier. Will assert if the calling code changes.
2266 const APInt *APDist = nullptr;
2267 uint64_t ConstDist =
2268 match(Dist, m_scev_APInt(APDist)) ? APDist->abs().getZExtValue() : 0;
2269
2270 // Attempt to prove strided accesses independent.
2271 if (APDist) {
2272 // If the distance between accesses and their strides are known constants,
2273 // check whether the accesses interlace each other.
2274 if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
2275 areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) {
2276 LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2277 return Dependence::NoDep;
2278 }
2279 } else {
2280 if (!LoopGuards)
2281 LoopGuards.emplace(
2282 ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2283 Dist = SE.applyLoopGuards(Dist, *LoopGuards);
2284 }
2285
2286 // Negative distances are not plausible dependencies.
2287 if (SE.isKnownNonPositive(Dist)) {
2288 if (SE.isKnownNonNegative(Dist)) {
2289 if (HasSameSize) {
2290 // Write to the same location with the same size.
2291 return Dependence::Forward;
2292 }
2293 LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2294 "different type sizes\n");
2295 return Dependence::Unknown;
2296 }
2297
2298 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2299 // Check if the first access writes to a location that is read in a later
2300 // iteration, where the distance between them is not a multiple of a vector
2301 // factor and relatively small.
2302 //
2303 // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
2304 // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
2305 // forward dependency will allow vectorization using any width.
2306
2307 if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2308 if (!ConstDist) {
2309 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2311 }
2312 if (!HasSameSize ||
2313 couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
2314 LLVM_DEBUG(
2315 dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2317 }
2318 }
2319
2320 LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
2321 return Dependence::Forward;
2322 }
2323
2324 int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
2325 // Below we only handle strictly positive distances.
2326 if (MinDistance <= 0) {
2327 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2329 }
2330
2331 if (!HasSameSize) {
2332 if (CheckCompletelyBeforeOrAfter())
2333 return Dependence::NoDep;
2334 LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
2335 "different type sizes\n");
2336 return Dependence::Unknown;
2337 }
2338 // Bail out early if passed-in parameters make vectorization not feasible.
2339 unsigned MinForcedFactor =
2340 std::max(1U, VectorizerParams::VectorizationFactor.getKnownMinValue());
2341 unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
2343 // The minimum number of iterations for a vectorized/unrolled version.
2344 unsigned MinNumIter = std::max(MinForcedFactor * ForcedUnroll, 2U);
2345
2346 // It's not vectorizable if the distance is smaller than the minimum distance
2347 // needed for a vectroized/unrolled version. Vectorizing one iteration in
2348 // front needs MaxStride. Vectorizing the last iteration needs TypeByteSize.
2349 // (No need to plus the last gap distance).
2350 //
2351 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2352 // foo(int *A) {
2353 // int *B = (int *)((char *)A + 14);
2354 // for (i = 0 ; i < 1024 ; i += 2)
2355 // B[i] = A[i] + 1;
2356 // }
2357 //
2358 // Two accesses in memory (stride is 4 * 2):
2359 // | A[0] | | A[2] | | A[4] | | A[6] | |
2360 // | B[0] | | B[2] | | B[4] |
2361 //
2362 // MinDistance needs for vectorizing iterations except the last iteration:
2363 // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4.
2364 // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
2365 //
2366 // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
2367 // 12, which is less than distance.
2368 //
2369 // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
2370 // the minimum distance needed is 28, which is greater than distance. It is
2371 // not safe to do vectorization.
2372 //
2373 // We use MaxStride (maximum of src and sink strides) to get a conservative
2374 // lower bound on the MinDistanceNeeded in case of different strides.
2375
2376 // We know that Dist is positive, but it may not be constant. Use the signed
2377 // minimum for computations below, as this ensures we compute the closest
2378 // possible dependence distance.
2379 uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
2380 if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
2381 if (!ConstDist) {
2382 // For non-constant distances, we checked the lower bound of the
2383 // dependence distance and the distance may be larger at runtime (and safe
2384 // for vectorization). Classify it as Unknown, so we re-try with runtime
2385 // checks, unless we can prove both accesses cannot overlap.
2386 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2388 }
2389 LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
2390 << MinDistance << '\n');
2391 return Dependence::Backward;
2392 }
2393
2394 // Unsafe if the minimum distance needed is greater than smallest dependence
2395 // distance distance.
2396 if (MinDistanceNeeded > MinDepDistBytes) {
2397 LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
2398 << MinDistanceNeeded << " size in bytes\n");
2399 return Dependence::Backward;
2400 }
2401
2402 MinDepDistBytes =
2403 std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
2404
2405 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2406 if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
2407 couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
2409
2410 uint64_t MaxVF = MinDepDistBytes / MaxStride;
2411 LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
2412 << " with max VF = " << MaxVF << '\n');
2413
2414 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2415 if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
2416 // For non-constant distances, we checked the lower bound of the dependence
2417 // distance and the distance may be larger at runtime (and safe for
2418 // vectorization). Classify it as Unknown, so we re-try with runtime checks,
2419 // unless we can prove both accesses cannot overlap.
2420 return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2422 }
2423
2424 if (CheckCompletelyBeforeOrAfter())
2425 return Dependence::NoDep;
2426
2427 MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2429}
2430
2432 ArrayRef<MemAccessInfo> CheckDeps) {
2433
2434 MinDepDistBytes = -1;
2436 for (MemAccessInfo CurAccess : CheckDeps) {
2437 if (Visited.contains(CurAccess))
2438 continue;
2439
2440 // Check accesses within this set.
2442 DepCands.findLeader(CurAccess);
2444 DepCands.member_end();
2445
2446 // Check every access pair.
2447 while (AI != AE) {
2448 Visited.insert(*AI);
2449 bool AIIsWrite = AI->getInt();
2450 // Reads from the same pointer don't create extra hazards, but multiple
2451 // stores do (WAW), so start from AI for writes and next(AI) for reads.
2453 (AIIsWrite ? AI : std::next(AI));
2454 while (OI != AE) {
2455 // Check every accessing instruction pair in program order.
2456 auto &Acc = Accesses[*AI];
2457 for (std::vector<unsigned>::iterator I1 = Acc.begin(), I1E = Acc.end();
2458 I1 != I1E; ++I1)
2459 // When checking for WAW (OI == AI) caused by multiple writes to the
2460 // same pointer, start I2 at the next access past I1 to avoid
2461 // self-comparison.
2462 for (std::vector<unsigned>::iterator
2463 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2464 I2E = (OI == AI ? I1E : Accesses[*OI].end());
2465 I2 != I2E; ++I2) {
2466 auto A = std::make_pair(&*AI, *I1);
2467 auto B = std::make_pair(&*OI, *I2);
2468
2469 assert(*I1 != *I2);
2470 if (*I1 > *I2)
2471 std::swap(A, B);
2472
2474 isDependent(*A.first, A.second, *B.first, B.second);
2476
2477 // Gather dependences unless we accumulated MaxDependences
2478 // dependences. In that case return as soon as we find the first
2479 // unsafe dependence. This puts a limit on this quadratic
2480 // algorithm.
2481 if (RecordDependences) {
2482 if (Type != Dependence::NoDep)
2483 Dependences.emplace_back(A.second, B.second, Type);
2484
2485 if (Dependences.size() >= MaxDependences) {
2486 RecordDependences = false;
2487 Dependences.clear();
2489 << "Too many dependences, stopped recording\n");
2490 }
2491 }
2492 if (!RecordDependences && !isSafeForVectorization())
2493 return false;
2494 }
2495 ++OI;
2496 }
2497 ++AI;
2498 }
2499 }
2500
2501 LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2502 return isSafeForVectorization();
2503}
2504
2507 MemAccessInfo Access(Ptr, IsWrite);
2508 auto I = Accesses.find(Access);
2510 if (I != Accesses.end()) {
2511 transform(I->second, std::back_inserter(Insts),
2512 [&](unsigned Idx) { return this->InstMap[Idx]; });
2513 }
2514
2515 return Insts;
2516}
2517
2519 "NoDep",
2520 "Unknown",
2521 "IndirectUnsafe",
2522 "InvariantUnsafe",
2523 "Forward",
2524 "ForwardButPreventsForwarding",
2525 "Backward",
2526 "BackwardVectorizable",
2527 "BackwardVectorizableButPreventsForwarding"};
2528
2530 raw_ostream &OS, unsigned Depth,
2531 const SmallVectorImpl<Instruction *> &Instrs) const {
2532 OS.indent(Depth) << DepName[Type] << ":\n";
2533 OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
2534 OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
2535}
2536
2537bool LoopAccessInfo::canAnalyzeLoop() {
2538 // We need to have a loop header.
2539 LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
2540 << TheLoop->getHeader()->getParent()->getName() << "' from "
2541 << TheLoop->getLocStr() << "\n");
2542
2543 // We can only analyze innermost loops.
2544 if (!TheLoop->isInnermost()) {
2545 LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2546 recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2547 return false;
2548 }
2549
2550 // We must have a single backedge.
2551 if (TheLoop->getNumBackEdges() != 1) {
2552 LLVM_DEBUG(
2553 dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2554 recordAnalysis("CFGNotUnderstood")
2555 << "loop control flow is not understood by analyzer";
2556 return false;
2557 }
2558
2559 // ScalarEvolution needs to be able to find the symbolic max backedge taken
2560 // count, which is an upper bound on the number of loop iterations. The loop
2561 // may execute fewer iterations, if it exits via an uncountable exit.
2562 const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount();
2563 if (isa<SCEVCouldNotCompute>(ExitCount)) {
2564 recordAnalysis("CantComputeNumberOfIterations")
2565 << "could not determine number of loop iterations";
2566 LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2567 return false;
2568 }
2569
2570 LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "
2571 << TheLoop->getHeader()->getName() << "\n");
2572 return true;
2573}
2574
2575bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
2576 const TargetLibraryInfo *TLI,
2577 DominatorTree *DT) {
2578 // Holds the Load and Store instructions.
2581 SmallPtrSet<MDNode *, 8> LoopAliasScopes;
2582
2583 // Holds all the different accesses in the loop.
2584 unsigned NumReads = 0;
2585 unsigned NumReadWrites = 0;
2586
2587 bool HasComplexMemInst = false;
2588
2589 // A runtime check is only legal to insert if there are no convergent calls.
2590 HasConvergentOp = false;
2591
2592 PtrRtChecking->Pointers.clear();
2593 PtrRtChecking->Need = false;
2594
2595 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
2596
2597 const bool EnableMemAccessVersioningOfLoop =
2599 !TheLoop->getHeader()->getParent()->hasOptSize();
2600
2601 // Traverse blocks in fixed RPOT order, regardless of their storage in the
2602 // loop info, as it may be arbitrary.
2603 LoopBlocksRPO RPOT(TheLoop);
2604 RPOT.perform(LI);
2605
2606 // Don't return early as soon as we found a memory access that cannot be
2607 // vectorize - HasConvergentOp must still be computed as it is part of LAI's
2608 // public API (used by LoopDistribute).
2609 for (BasicBlock *BB : RPOT) {
2610 // Scan the BB and collect legal loads and stores. Also detect any
2611 // convergent instructions.
2612 for (Instruction &I : *BB) {
2613 if (auto *Call = dyn_cast<CallBase>(&I)) {
2614 if (Call->isConvergent())
2615 HasConvergentOp = true;
2616 }
2617
2618 // Unsafe to vectorize and we already found a convergent operation, can
2619 // early return now.
2620 if (HasComplexMemInst && HasConvergentOp)
2621 return false;
2622
2623 // Already unsafe to vectorize; keep scanning for convergent ops.
2624 if (HasComplexMemInst)
2625 continue;
2626
2627 // Record alias scopes defined inside the loop.
2628 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
2629 for (Metadata *Op : Decl->getScopeList()->operands())
2630 LoopAliasScopes.insert(cast<MDNode>(Op));
2631
2632 // Many math library functions read the rounding mode. We will only
2633 // vectorize a loop if it contains known function calls that don't set
2634 // the flag. Therefore, it is safe to ignore this read from memory.
2635 auto *Call = dyn_cast<CallInst>(&I);
2637 continue;
2638
2639 // If this is a load, save it. If this instruction can read from memory
2640 // but is not a load, we only allow it if it's a call to a function with a
2641 // vector mapping and no pointer arguments.
2642 if (I.mayReadFromMemory()) {
2643 auto hasPointerArgs = [](CallBase *CB) {
2644 return any_of(CB->args(), [](Value const *Arg) {
2645 return Arg->getType()->isPointerTy();
2646 });
2647 };
2648
2649 // If the function has an explicit vectorized counterpart, and does not
2650 // take output/input pointers, we can safely assume that it can be
2651 // vectorized.
2652 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
2653 !hasPointerArgs(Call) && !VFDatabase::getMappings(*Call).empty())
2654 continue;
2655
2656 auto *Ld = dyn_cast<LoadInst>(&I);
2657 if (!Ld) {
2658 recordAnalysis("CantVectorizeInstruction", &I)
2659 << "instruction cannot be vectorized";
2660 HasComplexMemInst = true;
2661 continue;
2662 }
2663 if (!Ld->isSimple() && !IsAnnotatedParallel) {
2664 recordAnalysis("NonSimpleLoad", Ld)
2665 << "read with atomic ordering or volatile read";
2666 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2667 HasComplexMemInst = true;
2668 continue;
2669 }
2670 NumLoads++;
2671 Loads.push_back(Ld);
2672 DepChecker->addAccess(Ld);
2673 if (EnableMemAccessVersioningOfLoop)
2674 collectStridedAccess(Ld);
2675 continue;
2676 }
2677
2678 // Save 'store' instructions. Abort if other instructions write to memory.
2679 if (I.mayWriteToMemory()) {
2680 auto *St = dyn_cast<StoreInst>(&I);
2681 if (!St) {
2682 recordAnalysis("CantVectorizeInstruction", &I)
2683 << "instruction cannot be vectorized";
2684 HasComplexMemInst = true;
2685 continue;
2686 }
2687 if (!St->isSimple() && !IsAnnotatedParallel) {
2688 recordAnalysis("NonSimpleStore", St)
2689 << "write with atomic ordering or volatile write";
2690 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2691 HasComplexMemInst = true;
2692 continue;
2693 }
2694 NumStores++;
2695 Stores.push_back(St);
2696 DepChecker->addAccess(St);
2697 if (EnableMemAccessVersioningOfLoop)
2698 collectStridedAccess(St);
2699 }
2700 } // Next instr.
2701 } // Next block.
2702
2703 if (HasComplexMemInst)
2704 return false;
2705
2706 // Now we have two lists that hold the loads and the stores.
2707 // Next, we find the pointers that they use.
2708
2709 // Check if we see any stores. If there are no stores, then we don't
2710 // care if the pointers are *restrict*.
2711 if (!Stores.size()) {
2712 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2713 return true;
2714 }
2715
2717 AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE,
2718 LoopAliasScopes);
2719
2720 // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
2721 // multiple times on the same object. If the ptr is accessed twice, once
2722 // for read and once for write, it will only appear once (on the write
2723 // list). This is okay, since we are going to check for conflicts between
2724 // writes and between reads and writes, but not between reads and reads.
2725 SmallSet<std::pair<Value *, Type *>, 16> Seen;
2726
2727 // Record uniform store addresses to identify if we have multiple stores
2728 // to the same address.
2729 SmallPtrSet<Value *, 16> UniformStores;
2730
2731 for (StoreInst *ST : Stores) {
2732 Value *Ptr = ST->getPointerOperand();
2733
2734 if (isInvariant(Ptr)) {
2735 // Record store instructions to loop invariant addresses
2736 StoresToInvariantAddresses.push_back(ST);
2737 HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
2738 !UniformStores.insert(Ptr).second;
2739 }
2740
2741 // If we did *not* see this pointer before, insert it to the read-write
2742 // list. At this phase it is only a 'write' list.
2743 Type *AccessTy = getLoadStoreType(ST);
2744 if (Seen.insert({Ptr, AccessTy}).second) {
2745 ++NumReadWrites;
2746
2747 MemoryLocation Loc = MemoryLocation::get(ST);
2748 // The TBAA metadata could have a control dependency on the predication
2749 // condition, so we cannot rely on it when determining whether or not we
2750 // need runtime pointer checks.
2751 if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2752 Loc.AATags.TBAA = nullptr;
2753
2754 // Expand forked pointers (i.e., a phi of multiple strided pointers) into
2755 // all alternatives.
2756 visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2757 [&Accesses, AccessTy, Loc](Value *Ptr) {
2758 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2759 Accesses.addStore(NewLoc, AccessTy);
2760 });
2761 }
2762 }
2763
2764 if (IsAnnotatedParallel) {
2765 LLVM_DEBUG(
2766 dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2767 << "checks.\n");
2768 return true;
2769 }
2770
2771 for (LoadInst *LD : Loads) {
2772 Value *Ptr = LD->getPointerOperand();
2773 // If we did *not* see this pointer before, insert it to the read list. If
2774 // we *did* see it before, then it is already in the read-write list. This
2775 // allows us to vectorize expressions such as A[i] += x; Because the address
2776 // of A[i] is a read-write pointer. This only works if the index of A[i] is
2777 // strictly monotonic, which we approximate (conservatively) via
2778 // getPtrStride. If the address is unknown (e.g. A[B[i]]) then we may read,
2779 // modify, and write overlapping words. Note that "zero stride" is unsafe
2780 // and is being handled below.
2781 bool IsReadOnlyPtr = false;
2782 Type *AccessTy = getLoadStoreType(LD);
2783 if (Seen.insert({Ptr, AccessTy}).second ||
2784 !getPtrStride(*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,
2785 true)) {
2786 ++NumReads;
2787 IsReadOnlyPtr = true;
2788 }
2789
2790 // See if there is an unsafe dependency between a load to a uniform address and
2791 // store to the same uniform address.
2792 if (UniformStores.contains(Ptr)) {
2793 LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2794 "load and uniform store to the same address!\n");
2795 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2796 }
2797
2798 MemoryLocation Loc = MemoryLocation::get(LD);
2799 // The TBAA metadata could have a control dependency on the predication
2800 // condition, so we cannot rely on it when determining whether or not we
2801 // need runtime pointer checks.
2802 if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2803 Loc.AATags.TBAA = nullptr;
2804
2805 // Expand forked pointers (i.e., a phi of multiple strided pointers) into
2806 // all alternatives.
2807 visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2808 [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2809 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2810 Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2811 });
2812 }
2813
2814 // If we write (or read-write) to a single destination and there are no other
2815 // reads in this loop then is it safe to vectorize: the vectorized stores
2816 // preserve ordering via replication or order-preserving @llvm.masked.scatter.
2817 if (NumReadWrites == 1 && NumReads == 0) {
2818 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2819 return true;
2820 }
2821
2822 // Build dependence sets and check whether we need a runtime pointer bounds
2823 // check.
2824 Accesses.buildDependenceSets();
2825
2826 // Find pointers with computable bounds. We are going to use this information
2827 // to place a runtime bound check.
2828 Value *UncomputablePtr = nullptr;
2829 HasCompletePtrRtChecking =
2830 Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
2831 UncomputablePtr, AllowPartial, getDepChecker());
2832 if (!HasCompletePtrRtChecking) {
2833 const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2834 recordAnalysis("CantIdentifyArrayBounds", I)
2835 << "cannot identify array bounds";
2836 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2837 << "the array bounds.\n");
2838 return false;
2839 }
2840
2841 LLVM_DEBUG(
2842 dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2843
2844 bool DepsAreSafe = true;
2845 if (Accesses.isDependencyCheckNeeded()) {
2846 LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2847 DepsAreSafe =
2848 DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());
2849
2850 if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeChecks()) {
2851 LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2852
2853 PtrRtChecking->reset();
2854 PtrRtChecking->Need = true;
2855
2856 UncomputablePtr = nullptr;
2857 HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
2858 *PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr,
2859 AllowPartial, getDepChecker());
2860
2861 // Check that we found the bounds for the pointer.
2862 if (!HasCompletePtrRtChecking) {
2863 auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2864 recordAnalysis("CantCheckMemDepsAtRunTime", I)
2865 << "cannot check memory dependencies at runtime";
2866 LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2867 return false;
2868 }
2869
2870 // Clear the dependency checks. They are no longer needed.
2871 Accesses.resetDepChecks(*DepChecker);
2872
2873 DepsAreSafe = true;
2874 }
2875 }
2876
2877 // Update the invariant address dependence flags based on dependences found
2878 // by the dep checker. Even if dependences were not recorded (too many to
2879 // track), any InvariantUnsafe dep would still have set the status to Unsafe
2880 if (const auto *Deps = DepChecker->getDependences()) {
2881 for (const auto &Dep : *Deps) {
2883 continue;
2884 Instruction *Src = Dep.getSource(*DepChecker);
2885 Instruction *Dst = Dep.getDestination(*DepChecker);
2886 if (isa<LoadInst>(Src) != isa<LoadInst>(Dst)) {
2887 HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2888 } else {
2889 assert(isa<StoreInst>(Src) && isa<StoreInst>(Dst) &&
2890 "Expected both to be stores");
2891 HasStoreStoreDependenceInvolvingLoopInvariantAddress = true;
2892 }
2893 }
2894 }
2895
2896 if (HasConvergentOp) {
2897 recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2898 << "cannot add control dependency to convergent operation";
2899 LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2900 "would be needed with a convergent operation\n");
2901 return false;
2902 }
2903
2904 if (DepsAreSafe) {
2905 LLVM_DEBUG(
2906 dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2907 << (PtrRtChecking->Need ? "" : " don't")
2908 << " need runtime memory checks.\n");
2909 return true;
2910 }
2911
2912 emitUnsafeDependenceRemark();
2913 return false;
2914}
2915
2916void LoopAccessInfo::emitUnsafeDependenceRemark() {
2917 const auto *Deps = getDepChecker().getDependences();
2918 if (!Deps)
2919 return;
2920 const auto *Found =
2921 llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
2924 });
2925 if (Found == Deps->end())
2926 return;
2927 MemoryDepChecker::Dependence Dep = *Found;
2928
2929 LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2930
2931 // Emit remark for first unsafe dependence
2932 bool HasForcedDistribution = false;
2933 std::optional<const MDOperand *> Value =
2934 findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable");
2935 if (Value) {
2936 const MDOperand *Op = *Value;
2937 assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
2938 HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
2939 }
2940
2941 const std::string Info =
2942 HasForcedDistribution
2943 ? "unsafe dependent memory operations in loop."
2944 : "unsafe dependent memory operations in loop. Use "
2945 "#pragma clang loop distribute(enable) to allow loop distribution "
2946 "to attempt to isolate the offending operations into a separate "
2947 "loop";
2948 OptimizationRemarkAnalysis &R =
2949 recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;
2950
2951 switch (Dep.Type) {
2955 llvm_unreachable("Unexpected dependence");
2957 R << "\nBackward loop carried data dependence.";
2958 break;
2960 R << "\nForward loop carried data dependence that prevents "
2961 "store-to-load forwarding.";
2962 break;
2964 R << "\nBackward loop carried data dependence that prevents "
2965 "store-to-load forwarding.";
2966 break;
2968 R << "\nUnsafe indirect dependence.";
2969 break;
2971 R << "\nUnsafe dependence on loop-invariant address.";
2972 break;
2974 R << "\nUnknown data dependence.";
2975 break;
2976 }
2977
2978 if (Instruction *I = Dep.getSource(getDepChecker())) {
2979 DebugLoc SourceLoc = I->getDebugLoc();
2981 SourceLoc = DD->getDebugLoc();
2982 if (SourceLoc)
2983 R << " Memory location is the same as accessed at "
2984 << ore::NV("Location", SourceLoc);
2985 }
2986}
2987
2989 const Loop *TheLoop,
2990 const DominatorTree *DT) {
2991 assert(TheLoop->contains(BB) && "Unknown block used");
2992
2993 // Blocks that do not dominate the latch need predication.
2994 const BasicBlock *Latch = TheLoop->getLoopLatch();
2995 assert(Latch && "Loop expected to have a single latch.");
2996 return !DT->dominates(BB, Latch);
2997}
2998
3000LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
3001 assert(!Report && "Multiple reports generated");
3002
3003 const BasicBlock *CodeRegion = TheLoop->getHeader();
3004 DebugLoc DL = TheLoop->getStartLoc();
3005
3006 if (I) {
3007 CodeRegion = I->getParent();
3008 // If there is no debug location attached to the instruction, revert back to
3009 // using the loop's.
3010 if (I->getDebugLoc())
3011 DL = I->getDebugLoc();
3012 }
3013
3014 Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName,
3015 DL, CodeRegion);
3016 return *Report;
3017}
3018
3020 auto *SE = PSE->getSE();
3021 if (TheLoop->isLoopInvariant(V))
3022 return true;
3023 if (!SE->isSCEVable(V->getType()))
3024 return false;
3025 const SCEV *S = SE->getSCEV(V);
3026 return SE->isLoopInvariant(S, TheLoop);
3027}
3028
3029/// If \p Ptr is a GEP, which has a loop-variant operand, return that operand.
3030/// Otherwise, return \p Ptr.
3032 Loop *Lp) {
3033 auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
3034 if (!GEP)
3035 return Ptr;
3036
3037 Value *V = Ptr;
3038 for (const Use &U : GEP->operands()) {
3039 if (!SE->isLoopInvariant(SE->getSCEV(U), Lp)) {
3040 if (V == Ptr)
3041 V = U;
3042 else
3043 // There must be exactly one loop-variant operand.
3044 return Ptr;
3045 }
3046 }
3047 return V;
3048}
3049
3050/// Get the stride of a pointer access in a loop. Looks for symbolic
3051/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
3052static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
3053 auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
3054 if (!PtrTy)
3055 return nullptr;
3056
3057 // Try to remove a gep instruction to make the pointer (actually index at this
3058 // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
3059 // pointer, otherwise, we are analyzing the index.
3060 Value *OrigPtr = Ptr;
3061
3062 Ptr = getLoopVariantGEPOperand(Ptr, SE, Lp);
3063 const SCEV *V = SE->getSCEV(Ptr);
3064
3065 if (Ptr != OrigPtr)
3066 // Strip off casts.
3067 while (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
3068 V = C->getOperand();
3069
3071 return nullptr;
3072
3073 // Note that the restriction after this loop invariant check are only
3074 // profitability restrictions.
3075 if (!SE->isLoopInvariant(V, Lp))
3076 return nullptr;
3077
3078 // Look for the loop invariant symbolic value.
3079 if (isa<SCEVUnknown>(V))
3080 return V;
3081
3082 // Look through multiplies that scale a stride by a constant.
3084 if (auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
3085 if (isa<SCEVUnknown>(C->getOperand()))
3086 return V;
3087
3088 return nullptr;
3089}
3090
3091void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
3092 Value *Ptr = getLoadStorePointerOperand(MemAccess);
3093 if (!Ptr)
3094 return;
3095
3096 // Note: getStrideFromPointer is a *profitability* heuristic. We
3097 // could broaden the scope of values returned here - to anything
3098 // which happens to be loop invariant and contributes to the
3099 // computation of an interesting IV - but we chose not to as we
3100 // don't have a cost model here, and broadening the scope exposes
3101 // far too many unprofitable cases.
3102 const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
3103 if (!StrideExpr)
3104 return;
3105
3106 if (match(StrideExpr, m_scev_UndefOrPoison()))
3107 return;
3108
3109 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
3110 "versioning:");
3111 LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
3112
3113 if (!SpeculateUnitStride) {
3114 LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");
3115 return;
3116 }
3117
3118 // Avoid adding the "Stride == 1" predicate when we know that
3119 // Stride >= Trip-Count. Such a predicate will effectively optimize a single
3120 // or zero iteration loop, as Trip-Count <= Stride == 1.
3121 //
3122 // TODO: We are currently not making a very informed decision on when it is
3123 // beneficial to apply stride versioning. It might make more sense that the
3124 // users of this analysis (such as the vectorizer) will trigger it, based on
3125 // their specific cost considerations; For example, in cases where stride
3126 // versioning does not help resolving memory accesses/dependences, the
3127 // vectorizer should evaluate the cost of the runtime test, and the benefit
3128 // of various possible stride specializations, considering the alternatives
3129 // of using gather/scatters (if available).
3130
3131 const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();
3132
3133 // Match the types so we can compare the stride and the MaxBTC.
3134 // The Stride can be positive/negative, so we sign extend Stride;
3135 // The backedgeTakenCount is non-negative, so we zero extend MaxBTC.
3136 const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
3137 uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
3138 uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
3139 const SCEV *CastedStride = StrideExpr;
3140 const SCEV *CastedBECount = MaxBTC;
3141 ScalarEvolution *SE = PSE->getSE();
3142 if (BETypeSizeBits >= StrideTypeSizeBits)
3143 CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType());
3144 else
3145 CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType());
3146 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
3147 // Since TripCount == BackEdgeTakenCount + 1, checking:
3148 // "Stride >= TripCount" is equivalent to checking:
3149 // Stride - MaxBTC> 0
3150 if (SE->isKnownPositive(StrideMinusBETaken)) {
3151 LLVM_DEBUG(
3152 dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
3153 "Stride==1 predicate will imply that the loop executes "
3154 "at most once.\n");
3155 return;
3156 }
3157 LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
3158
3159 // Strip back off the integer cast, and check that our result is a
3160 // SCEVUnknown as we expect.
3161 const SCEV *StrideBase = StrideExpr;
3162 if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(StrideBase))
3163 StrideBase = C->getOperand();
3164 SymbolicStrides[Ptr] = cast<SCEVUnknown>(StrideBase);
3165}
3166
3168 const TargetTransformInfo *TTI,
3169 const TargetLibraryInfo *TLI, AAResults *AA,
3170 DominatorTree *DT, LoopInfo *LI,
3171 AssumptionCache *AC, bool AllowPartial)
3172 : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3173 PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
3174 unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3175 if (TTI && !TTI->enableScalableVectorization())
3176 // Scale the vector width by 2 as rough estimate to also consider
3177 // interleaving.
3178 MaxTargetVectorWidthInBits =
3179 TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
3180
3181 DepChecker = std::make_unique<MemoryDepChecker>(
3182 *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits, LoopGuards);
3183 PtrRtChecking =
3184 std::make_unique<RuntimePointerChecking>(*DepChecker, SE, LoopGuards);
3185 if (canAnalyzeLoop())
3186 CanVecMem = analyzeLoop(AA, LI, TLI, DT);
3187}
3188
3189void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
3190 if (CanVecMem) {
3191 OS.indent(Depth) << "Memory dependences are safe";
3192 const MemoryDepChecker &DC = getDepChecker();
3193 if (!DC.isSafeForAnyVectorWidth())
3194 OS << " with a maximum safe vector width of "
3195 << DC.getMaxSafeVectorWidthInBits() << " bits";
3198 OS << ", with a maximum safe store-load forward width of " << SLDist
3199 << " bits";
3200 }
3201 if (PtrRtChecking->Need)
3202 OS << " with run-time checks";
3203 OS << "\n";
3204 }
3205
3206 if (HasConvergentOp)
3207 OS.indent(Depth) << "Has convergent operation in loop\n";
3208
3209 if (Report)
3210 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
3211
3212 if (auto *Dependences = DepChecker->getDependences()) {
3213 OS.indent(Depth) << "Dependences:\n";
3214 for (const auto &Dep : *Dependences) {
3215 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
3216 OS << "\n";
3217 }
3218 } else
3219 OS.indent(Depth) << "Too many dependences, not recorded\n";
3220
3221 // List the pair of accesses need run-time checks to prove independence.
3222 PtrRtChecking->print(OS, Depth);
3223 if (PtrRtChecking->Need && !HasCompletePtrRtChecking)
3224 OS.indent(Depth) << "Generated run-time checks are incomplete\n";
3225 OS << "\n";
3226
3227 OS.indent(Depth)
3228 << "Non vectorizable stores to invariant address were "
3229 << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
3230 HasLoadStoreDependenceInvolvingLoopInvariantAddress
3231 ? ""
3232 : "not ")
3233 << "found in loop.\n";
3234
3235 OS.indent(Depth) << "SCEV assumptions:\n";
3236 PSE->getPredicate().print(OS, Depth);
3237
3238 OS << "\n";
3239
3240 OS.indent(Depth) << "Expressions re-written:\n";
3241 PSE->print(OS, Depth);
3242}
3243
3245 bool AllowPartial) {
3246 const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);
3247
3248 // We need to create the LoopAccessInfo if either we don't already have one,
3249 // or if it was created with a different value of AllowPartial.
3250 if (Inserted || It->second->hasAllowPartial() != AllowPartial)
3251 It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
3252 &LI, AC, AllowPartial);
3253
3254 return *It->second;
3255}
3257 // Collect LoopAccessInfo entries that may keep references to IR outside the
3258 // analyzed loop or SCEVs that may have been modified or invalidated. At the
3259 // moment, that is loops requiring memory or SCEV runtime checks, as those cache
3260 // SCEVs, e.g. for pointer expressions.
3261 LoopAccessInfoMap.remove_if([](const auto &Entry) {
3262 const auto &LAI = Entry.second;
3263 return !(LAI->getRuntimePointerChecking()->getChecks().empty() &&
3264 LAI->getPSE().getPredicate().isAlwaysTrue());
3265 });
3266}
3267
3269 Function &F, const PreservedAnalyses &PA,
3270 FunctionAnalysisManager::Invalidator &Inv) {
3271 // Check whether our analysis is preserved.
3272 auto PAC = PA.getChecker<LoopAccessAnalysis>();
3273 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3274 // If not, give up now.
3275 return true;
3276
3277 // Check whether the analyses we depend on became invalid for any reason.
3278 // Skip checking TargetLibraryAnalysis as it is immutable and can't become
3279 // invalid.
3280 return Inv.invalidate<AAManager>(F, PA) ||
3281 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
3282 Inv.invalidate<LoopAnalysis>(F, PA) ||
3283 Inv.invalidate<DominatorTreeAnalysis>(F, PA);
3284}
3285
3288 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
3289 auto &AA = FAM.getResult<AAManager>(F);
3290 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
3291 auto &LI = FAM.getResult<LoopAnalysis>(F);
3292 auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
3293 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
3294 auto &AC = FAM.getResult<AssumptionAnalysis>(F);
3295 return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC);
3296}
3297
3298AnalysisKey LoopAccessAnalysis::Key;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
DXIL Resource Access
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define DEBUG_TYPE
Hexagon Common GEP
#define _
This header defines various interfaces for pass management in LLVM.
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &ScevList, unsigned Depth)
static const SCEV * mulSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A * B, if it is guaranteed not to unsigned wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, Value *Ptr, Type *AccessTy, const Loop *L, const DominatorTree &DT, std::optional< int64_t > Stride=std::nullopt, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Check whether AR is a non-wrapping AddRec.
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
static const SCEV * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
static cl::opt< ElementCount, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *MaxBTC, const SCEV *EltSize, ScalarEvolution &SE, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Return true, if evaluating AR at MaxBTC cannot wrap, because AR at MaxBTC is guaranteed inbounds of t...
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
static cl::opt< bool, true > HoistRuntimeChecks("hoist-runtime-checks", cl::Hidden, cl::desc("Hoist inner loop runtime memory checks to outer loop if possible"), cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true))
static DenseMap< const RuntimeCheckingPtrGroup *, unsigned > getPtrToIdxMap(ArrayRef< RuntimeCheckingPtrGroup > CheckingGroups)
Assign each RuntimeCheckingPtrGroup pointer an index for stable UTC output.
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &MaxBTC, const SCEV &Dist, uint64_t MaxStride)
Given a dependence-distance Dist between two memory accesses, that have strides in the same direction...
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
static Value * getLoopVariantGEPOperand(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If Ptr is a GEP, which has a loop-variant operand, return that operand.
static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))
static cl::opt< bool > SpeculateUnitStride("laa-speculate-unit-stride", cl::Hidden, cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true))
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
static const SCEV * addSCEVNoOverflow(const SCEV *A, const SCEV *B, ScalarEvolution &SE)
Returns A + B, if it is guaranteed not to unsigned wrap.
This header provides classes for managing per-loop analyses.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility analysis objects describing memory locations.
#define P(N)
FunctionAnalysisManager FAM
This file defines the PointerIntPair class.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static const X86InstrFMA3Group Groups[]
A manager for alias analyses.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
APInt abs() const
Get the absolute value.
Definition APInt.h:1818
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1597
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isConvergent() const
Determine if the invoke is convergent.
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
bool isNegative() const
Definition Constants.h:214
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:126
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
iterator end()
Definition DenseMap.h:143
Analysis pass which computes a DominatorTree.
Definition Dominators.h:270
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
iterator_range< member_iterator > members(const ECValue &ECV) const
bool contains(const ElemTy &V) const
Returns true if V is contained an equivalence class.
const ECValue & insert(const ElemTy &Data)
Insert a new value into the union/find set, ignoring the request if the value already exists.
member_iterator member_end() const
const ElemTy & getLeaderValue(const ElemTy &V) const
Return the leader for the specified value that is in the set.
member_iterator findLeader(const ElemTy &V) const
Given a value in the set, return a member iterator for the equivalence class it is in.
void eraseClass(const ElemTy &V)
Erase the class containing V, i.e.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
Merge the two equivalence sets for the specified values, inserting them if they do not already exist ...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:688
bool empty() const
Definition Function.h:833
PointerType * getType() const
Global values are always pointers.
An instruction for reading from memory.
Value * getPointerOperand()
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ABI Result run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
LLVM_ABI const LoopAccessInfo & getInfo(Loop &L, bool AllowPartial=false)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
LLVM_ABI bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial=false)
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
std::string getLocStr() const
Return a string containing the debug location of the loop (file name + line number if present,...
Definition LoopInfo.cpp:694
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition LoopInfo.cpp:592
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:659
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1424
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool isSafeForAnyStoreLoadForwardDistances() const
Return true if there are no store-load forwarding dependencies.
LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets, ArrayRef< MemAccessInfo > CheckDeps)
Check whether the dependencies between the accesses are safe, and records the dependence information ...
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
PointerIntPair< Value *, 1, bool > MemAccessInfo
EquivalenceClasses< MemAccessInfo > DepCandidates
Set of potential dependent memory accesses.
bool shouldRetryWithRuntimeChecks() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
LLVM_ABI SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
LLVM_ABI void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
uint64_t getStoreLoadForwardSafeDistanceInBits() const
Return safe power-of-2 number of elements, which do not prevent store-load forwarding,...
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
Diagnostic information for optimization analysis remarks.
PointerIntPair - This class implements a pair of a pointer and small integer.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
LLVM_ABI void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've statically proved that V doesn't wrap.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V, SmallVectorImpl< const SCEVPredicate * > *WrapPredsAdded=nullptr)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI void addPredicates(ArrayRef< const SCEVPredicate * > Preds)
Adds all predicates in Preds.
LLVM_ABI const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
LLVM_ABI void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
LLVM_ABI bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
static LLVM_ABI bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands)
Generate the checks and store it.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
LLVM_ABI void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
IncrementWrapFlags
Similar to SCEV::NoWrapFlags, but with slightly different semantics for FlagNUSW.
static SCEVWrapPredicate::IncrementWrapFlags clearFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, SCEVWrapPredicate::IncrementWrapFlags OffFlags)
Convenient IncrementWrapFlags manipulation methods.
static SCEVWrapPredicate::IncrementWrapFlags getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Returns the set of SCEVWrapPredicate no wrap flags implied by a SCEVAddRecExpr.
This class represents an analyzed expression in the program.
static constexpr auto NoWrapMask
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI Type * getWiderType(Type *Ty1, Type *Ty2) const
LLVM_ABI const SCEV * getAbsExpr(const SCEV *Op, bool IsNSW)
LLVM_ABI bool isKnownNonPositive(const SCEV *S)
Test if the given expression is known to be non-positive.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI=nullptr)
Is operation BinOp between LHS and RHS provably does not have a signed/unsigned overflow (Signed)?
LLVM_ABI const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
APInt getSignedRangeMin(const SCEV *S)
Determine the min of the signed range for a particular SCEV.
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI const SCEVPredicate * getWrapPredicate(const SCEVAddRecExpr *AR, SCEVWrapPredicate::IncrementWrapFlags AddedFlags)
LLVM_ABI const SCEV * getNoopOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getCouldNotCompute()
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI const SCEV * getPtrToAddrExpr(const SCEV *Op)
LLVM_ABI const SCEVAddRecExpr * convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L, SmallVectorImpl< const SCEVPredicate * > &Preds)
Tries to convert the S expression to an AddRec expression, adding additional predicates to Preds as r...
LLVM_ABI const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)
Return an expression for a TypeSize.
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI const SCEV * getUMinExpr(SCEVUse LHS, SCEVUse RHS, bool Sequential=false)
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition SmallSet.h:229
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:76
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool *CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:909
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool match(Val *V, const Pattern &P)
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
is_undef_or_poison m_scev_UndefOrPoison()
Match an SCEVUnknown wrapping undef or poison.
specificloop_ty m_SpecificLoop(const Loop *L)
match_bind< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
specificscev_ty m_scev_Specific(const SCEV *S)
Match if we have a specific specified SCEV.
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, bool > hasa(Y &&MD)
Check whether Metadata has a Value.
Definition Metadata.h:651
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI std::pair< const SCEV *, const SCEV * > getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap< std::pair< const SCEV *, const SCEV * >, std::pair< const SCEV *, const SCEV * > > *PointerBounds, DominatorTree *DT, AssumptionCache *AC, std::optional< ScalarEvolution::LoopGuards > &LoopGuards)
Calculate Start and End points of memory access using exact backedge taken count BTC if computable or...
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:573
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI RetainedKnowledge getKnowledgeForValue(const Value *V, ArrayRef< Attribute::AttrKind > AttrKinds, AssumptionCache &AC, function_ref< bool(RetainedKnowledge, Instruction *, const CallBase::BundleOpInfo *)> Filter=[](auto...) { return true;})
Return a valid Knowledge associated to the Value V if its Attribute kind is in AttrKinds and it match...
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
unsigned getPointerAddressSpace(const Type *T)
Definition SPIRVUtils.h:389
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::optional< int64_t > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
LLVM_ABI bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
TargetTransformInfo TTI
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
IntPtrTy
Definition InstrProf.h:82
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI std::optional< int64_t > getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, Value *Ptr, PredicatedScalarEvolution &PSE)
If AR is an affine AddRec for Lp with a constant step, return the step in units of AccessTy's allocat...
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool ShouldCheckWrap=true, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
If the pointer has a constant stride return it in units of the access type size.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
IR Values for the lower and upper bounds of a pointer evolution.
MDNode * Scope
The tag for alias scope specification (used with noalias).
Definition Metadata.h:786
MDNode * TBAA
The tag for type-based alias analysis.
Definition Metadata.h:780
MDNode * NoAlias
The tag specifying the noalias scope.
Definition Metadata.h:789
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
DepType Type
The type of the dependence.
unsigned Destination
Index of the destination of the dependence in the InstMap vector.
LLVM_ABI bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
LLVM_ABI bool isForward() const
Lexically forward dependence.
LLVM_ABI bool isBackward() const
Lexically backward dependence.
LLVM_ABI void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
unsigned Source
Index of the source of the dependence in the InstMap vector.
DepType
The type of the dependence.
static LLVM_ABI const char * DepName[]
String version of the types.
static LLVM_ABI VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
Represent one information held inside an operand bundle of an llvm.assume.
unsigned AddressSpace
Address space of the involved pointers.
LLVM_ABI bool addPointer(unsigned Index, const RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
LLVM_ABI RuntimeCheckingPtrGroup(unsigned Index, const RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
static LLVM_ABI const unsigned MaxVectorWidth
Maximum SIMD width.
static LLVM_ABI unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static LLVM_ABI bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static LLVM_ABI unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static LLVM_ABI ElementCount VectorizationFactor
VF as overridden by the user.
static LLVM_ABI bool HoistRuntimeChecks
Function object to check whether the first component of a container supported by std::get (like std::...
Definition STLExtras.h:1439