LLVM 23.0.0git
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements an idiom recognizer that transforms simple loops into a
10// non-loop form. In cases that this kicks in, it can be a significant
11// performance win.
12//
13// If compiling for code size we avoid idiom recognition if the resulting
14// code could be larger than the code for the original loop. One way this could
15// happen is if the loop is not removable after idiom recognition due to the
16// presence of non-idiom instructions. The initial implementation of the
17// heuristics applies to idioms in multi-block loops.
18//
19//===----------------------------------------------------------------------===//
20//
21// TODO List:
22//
23// Future loop memory idioms to recognize: memcmp, etc.
24//
25// This could recognize common matrix multiplies and dot product idioms and
26// replace them with calls to BLAS (if linked in??).
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/ArrayRef.h"
33#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
57#include "llvm/IR/BasicBlock.h"
58#include "llvm/IR/Constant.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/DataLayout.h"
61#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/Dominators.h"
64#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/InstrTypes.h"
68#include "llvm/IR/Instruction.h"
71#include "llvm/IR/Intrinsics.h"
72#include "llvm/IR/LLVMContext.h"
73#include "llvm/IR/Module.h"
74#include "llvm/IR/PassManager.h"
77#include "llvm/IR/Type.h"
78#include "llvm/IR/User.h"
79#include "llvm/IR/Value.h"
80#include "llvm/IR/ValueHandle.h"
83#include "llvm/Support/Debug.h"
90#include <algorithm>
91#include <cassert>
92#include <cstdint>
93#include <utility>
94
95using namespace llvm;
96using namespace SCEVPatternMatch;
97
98#define DEBUG_TYPE "loop-idiom"
99
100STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
101STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
102STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
103STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
105 NumShiftUntilBitTest,
106 "Number of uncountable loops recognized as 'shift until bitttest' idiom");
107STATISTIC(NumShiftUntilZero,
108 "Number of uncountable loops recognized as 'shift until zero' idiom");
109
110namespace llvm {
113 DisableLIRPAll("disable-" DEBUG_TYPE "-all",
114 cl::desc("Options to disable Loop Idiom Recognize Pass."),
117
120 DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
121 cl::desc("Proceed with loop idiom recognize pass, but do "
122 "not convert loop(s) to memset."),
125
128 DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
129 cl::desc("Proceed with loop idiom recognize pass, but do "
130 "not convert loop(s) to memcpy."),
133
136 DisableLIRPStrlen("disable-loop-idiom-strlen",
137 cl::desc("Proceed with loop idiom recognize pass, but do "
138 "not convert loop(s) to strlen."),
141
144 EnableLIRPWcslen("disable-loop-idiom-wcslen",
145 cl::desc("Proceed with loop idiom recognize pass, "
146 "enable conversion of loop(s) to wcslen."),
149
152 DisableLIRPHashRecognize("disable-" DEBUG_TYPE "-hashrecognize",
153 cl::desc("Proceed with loop idiom recognize pass, "
154 "but do not optimize CRC loops."),
156 cl::init(false), cl::ReallyHidden);
157
159 "use-lir-code-size-heurs",
160 cl::desc("Use loop idiom recognition code size heuristics when compiling "
161 "with -Os/-Oz"),
162 cl::init(true), cl::Hidden);
163
165 "loop-idiom-force-memset-pattern-intrinsic",
166 cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
167 cl::Hidden);
168
170
171} // namespace llvm
172
173namespace {
174
175class LoopIdiomRecognize {
176 Loop *CurLoop = nullptr;
178 DominatorTree *DT;
179 LoopInfo *LI;
180 ScalarEvolution *SE;
183 const DataLayout *DL;
185 bool ApplyCodeSizeHeuristics;
186 std::unique_ptr<MemorySSAUpdater> MSSAU;
187
188public:
189 explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
190 LoopInfo *LI, ScalarEvolution *SE,
192 const TargetTransformInfo *TTI, MemorySSA *MSSA,
193 const DataLayout *DL,
195 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
196 if (MSSA)
197 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
198 }
199
200 bool runOnLoop(Loop *L);
201
202private:
203 using StoreList = SmallVector<StoreInst *, 8>;
204 using StoreListMap = MapVector<Value *, StoreList>;
205
206 StoreListMap StoreRefsForMemset;
207 StoreListMap StoreRefsForMemsetPattern;
208 StoreList StoreRefsForMemcpy;
209 bool HasMemset;
210 bool HasMemsetPattern;
211 bool HasMemcpy;
212
213 /// Return code for isLegalStore()
214 enum LegalStoreKind {
215 None = 0,
216 Memset,
217 MemsetPattern,
218 Memcpy,
219 UnorderedAtomicMemcpy,
220 DontUse // Dummy retval never to be used. Allows catching errors in retval
221 // handling.
222 };
223
224 /// \name Countable Loop Idiom Handling
225 /// @{
226
227 bool runOnCountableLoop();
228 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
229 SmallVectorImpl<BasicBlock *> &ExitBlocks);
230
231 void collectStores(BasicBlock *BB);
232 LegalStoreKind isLegalStore(StoreInst *SI);
233 enum class ForMemset { No, Yes };
234 bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
235 ForMemset For);
236
237 template <typename MemInst>
238 bool processLoopMemIntrinsic(
239 BasicBlock *BB,
240 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
241 const SCEV *BECount);
242 bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
243 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
244
245 bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
246 MaybeAlign StoreAlignment, Value *StoredVal,
247 Instruction *TheStore,
248 SmallPtrSetImpl<Instruction *> &Stores,
249 const SCEVAddRecExpr *Ev, const SCEV *BECount,
250 bool IsNegStride, bool IsLoopMemset = false);
251 bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
252 bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
253 const SCEV *StoreSize, MaybeAlign StoreAlign,
254 MaybeAlign LoadAlign, Instruction *TheStore,
255 Instruction *TheLoad,
256 const SCEVAddRecExpr *StoreEv,
257 const SCEVAddRecExpr *LoadEv,
258 const SCEV *BECount);
259 bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
260 bool IsLoopMemset = false);
261 bool optimizeCRCLoop(const PolynomialInfo &Info);
262
263 /// @}
264 /// \name Noncountable Loop Idiom Handling
265 /// @{
266
267 bool runOnNoncountableLoop();
268
269 bool recognizePopcount();
270 void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
271 PHINode *CntPhi, Value *Var);
272 bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
273 bool ZeroCheck, size_t CanonicalSize);
274 bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
275 Instruction *DefX, PHINode *CntPhi,
276 Instruction *CntInst);
277 bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
278 bool recognizeShiftUntilLessThan();
279 void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
280 Instruction *CntInst, PHINode *CntPhi,
281 Value *Var, Instruction *DefX,
282 const DebugLoc &DL, bool ZeroCheck,
283 bool IsCntPhiUsedOutsideLoop,
284 bool InsertSub = false);
285
286 bool recognizeShiftUntilBitTest();
287 bool recognizeShiftUntilZero();
288 bool recognizeAndInsertStrLen();
289
290 /// @}
291};
292} // end anonymous namespace
293
296 LPMUpdater &) {
298 return PreservedAnalyses::all();
299
300 const auto *DL = &L.getHeader()->getDataLayout();
301
302 // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
303 // pass. Function analyses need to be preserved across loop transformations
304 // but ORE cannot be preserved (see comment before the pass definition).
305 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
306
307 LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
308 AR.MSSA, DL, ORE);
309 if (!LIR.runOnLoop(&L))
310 return PreservedAnalyses::all();
311
313 if (AR.MSSA)
314 PA.preserve<MemorySSAAnalysis>();
315 return PA;
316}
317
319 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
320 I->eraseFromParent();
321}
322
323//===----------------------------------------------------------------------===//
324//
325// Implementation of LoopIdiomRecognize
326//
327//===----------------------------------------------------------------------===//
328
329bool LoopIdiomRecognize::runOnLoop(Loop *L) {
330 CurLoop = L;
331 // If the loop could not be converted to canonical form, it must have an
332 // indirectbr in it, just give up.
333 if (!L->getLoopPreheader())
334 return false;
335
336 // Disable loop idiom recognition if the function's name is a common idiom.
337 StringRef Name = L->getHeader()->getParent()->getName();
338 if (Name == "memset" || Name == "memcpy" || Name == "strlen" ||
339 Name == "wcslen")
340 return false;
341
342 // Determine if code size heuristics need to be applied.
343 ApplyCodeSizeHeuristics =
344 L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
345
346 HasMemset = TLI->has(LibFunc_memset);
347 // TODO: Unconditionally enable use of the memset pattern intrinsic (or at
348 // least, opt-in via target hook) once we are confident it will never result
349 // in worse codegen than without. For now, use it only when the target
350 // supports memset_pattern16 libcall (or unless this is overridden by
351 // command line option).
352 HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
353 HasMemcpy = TLI->has(LibFunc_memcpy);
354
355 if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic ||
356 HasMemcpy || !DisableLIRP::HashRecognize)
358 return runOnCountableLoop();
359
360 return runOnNoncountableLoop();
361}
362
363bool LoopIdiomRecognize::runOnCountableLoop() {
364 const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
366 "runOnCountableLoop() called on a loop without a predictable"
367 "backedge-taken count");
368
369 // If this loop executes exactly one time, then it should be peeled, not
370 // optimized by this pass.
371 if (BECount->isZero())
372 return false;
373
375 CurLoop->getUniqueExitBlocks(ExitBlocks);
376
377 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
378 << CurLoop->getHeader()->getParent()->getName()
379 << "] Countable Loop %" << CurLoop->getHeader()->getName()
380 << "\n");
381
382 // The following transforms hoist stores/memsets into the loop pre-header.
383 // Give up if the loop has instructions that may throw.
384 SimpleLoopSafetyInfo SafetyInfo;
385 SafetyInfo.computeLoopSafetyInfo(CurLoop);
386 if (SafetyInfo.anyBlockMayThrow())
387 return false;
388
389 bool MadeChange = false;
390
391 // Scan all the blocks in the loop that are not in subloops.
392 for (auto *BB : CurLoop->getBlocks()) {
393 // Ignore blocks in subloops.
394 if (LI->getLoopFor(BB) != CurLoop)
395 continue;
396
397 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
398 }
399
400 // Optimize a CRC loop if HashRecognize found one, provided we're not
401 // optimizing for size.
402 if (!DisableLIRP::HashRecognize && !ApplyCodeSizeHeuristics)
403 if (auto Res = HashRecognize(*CurLoop, *SE).getResult())
404 optimizeCRCLoop(*Res);
405
406 return MadeChange;
407}
408
409static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
410 const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
411 return ConstStride->getAPInt();
412}
413
414/// getMemSetPatternValue - If a strided store of the specified value is safe to
415/// turn into a memset.patternn intrinsic, return the Constant that should
416/// be passed in. Otherwise, return null.
417///
418/// TODO this function could allow more constants than it does today (e.g.
419/// those over 16 bytes) now it has transitioned to being used for the
420/// memset.pattern intrinsic rather than directly the memset_pattern16
421/// libcall.
423 // FIXME: This could check for UndefValue because it can be merged into any
424 // other valid pattern.
425
426 // If the value isn't a constant, we can't promote it to being in a constant
427 // array. We could theoretically do a store to an alloca or something, but
428 // that doesn't seem worthwhile.
430 if (!C || isa<ConstantExpr>(C))
431 return nullptr;
432
433 // Only handle simple values that are a power of two bytes in size.
434 uint64_t Size = DL->getTypeSizeInBits(V->getType());
435 if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
436 return nullptr;
437
438 // Don't care enough about darwin/ppc to implement this.
439 if (DL->isBigEndian())
440 return nullptr;
441
442 // Convert to size in bytes.
443 Size /= 8;
444
445 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
446 // if the top and bottom are the same (e.g. for vectors and large integers).
447 if (Size > 16)
448 return nullptr;
449
450 // For now, don't handle types that aren't int, floats, or pointers.
451 Type *CTy = C->getType();
452 if (!CTy->isIntOrPtrTy() && !CTy->isFloatingPointTy())
453 return nullptr;
454
455 return C;
456}
457
458LoopIdiomRecognize::LegalStoreKind
459LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
460 // Don't touch volatile stores.
461 if (SI->isVolatile())
462 return LegalStoreKind::None;
463 // We only want simple or unordered-atomic stores.
464 if (!SI->isUnordered())
465 return LegalStoreKind::None;
466
467 // Avoid merging nontemporal stores.
468 if (SI->getMetadata(LLVMContext::MD_nontemporal))
469 return LegalStoreKind::None;
470
471 Value *StoredVal = SI->getValueOperand();
472 Value *StorePtr = SI->getPointerOperand();
473
474 if (DL->hasUnstableRepresentation(StoredVal->getType()))
475 return LegalStoreKind::None;
476
477 // Transformations could invalidate the external-state pointers
478 // memcpy - LangRef specifies that a valid memcpy must preserve external
479 // state, so no transformations are blocked by it.
480 // memset - We assume that a memset of 0 has an equivalent external state
481 // effect as a null pointer store. This is currently not explicitly
482 // specified, but is true of the one exemplar we have (CHERI
483 // capabilities). All other memset formations are not safe.
484 bool MustPreserveExternalState = DL->hasExternalState(StoredVal->getType()) &&
485 !isa<ConstantPointerNull>(StoredVal);
486
487 // Reject stores that are so large that they overflow an unsigned.
488 // When storing out scalable vectors we bail out for now, since the code
489 // below currently only works for constant strides.
490 TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
491 if (SizeInBits.isScalable() || (SizeInBits.getFixedValue() & 7) ||
492 (SizeInBits.getFixedValue() >> 32) != 0)
493 return LegalStoreKind::None;
494
495 // See if the pointer expression is an AddRec like {base,+,1} on the current
496 // loop, which indicates a strided store. If we have something else, it's a
497 // random store we can't handle.
498 const SCEV *StoreEv = SE->getSCEV(StorePtr);
499 const SCEVConstant *Stride;
500 if (!match(StoreEv, m_scev_AffineAddRec(m_SCEV(), m_SCEVConstant(Stride),
501 m_SpecificLoop(CurLoop))))
502 return LegalStoreKind::None;
503
504 // See if the store can be turned into a memset.
505
506 // If the stored value is a byte-wise value (like i32 -1), then it may be
507 // turned into a memset of i8 -1, assuming that all the consecutive bytes
508 // are stored. A store of i32 0x01020304 can never be turned into a memset,
509 // but it can be turned into memset_pattern if the target supports it.
510 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
511
512 // Note: memset and memset_pattern on unordered-atomic is yet not supported
513 bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
514
515 // If we're allowed to form a memset, and the stored value would be
516 // acceptable for memset, use it.
517 if (!MustPreserveExternalState && !UnorderedAtomic && HasMemset &&
518 SplatValue && !DisableLIRP::Memset &&
519 // Verify that the stored value is loop invariant. If not, we can't
520 // promote the memset.
521 CurLoop->isLoopInvariant(SplatValue)) {
522 // It looks like we can use SplatValue.
523 return LegalStoreKind::Memset;
524 }
525 if (!MustPreserveExternalState && !UnorderedAtomic &&
526 (HasMemsetPattern || ForceMemsetPatternIntrinsic) &&
528 // Don't create memset_pattern16s with address spaces.
529 StorePtr->getType()->getPointerAddressSpace() == 0 &&
530 getMemSetPatternValue(StoredVal, DL)) {
531 // It looks like we can use PatternValue!
532 return LegalStoreKind::MemsetPattern;
533 }
534
535 // Otherwise, see if the store can be turned into a memcpy.
536 if (HasMemcpy && !DisableLIRP::Memcpy) {
537 // Check to see if the stride matches the size of the store. If so, then we
538 // know that every byte is touched in the loop.
539 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
540 APInt StrideAP = Stride->getAPInt();
541 if (StoreSize != StrideAP && StoreSize != -StrideAP)
542 return LegalStoreKind::None;
543
544 // The store must be feeding a non-volatile load.
545 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
546
547 // Only allow non-volatile loads
548 if (!LI || LI->isVolatile())
549 return LegalStoreKind::None;
550 // Only allow simple or unordered-atomic loads
551 if (!LI->isUnordered())
552 return LegalStoreKind::None;
553
554 // See if the pointer expression is an AddRec like {base,+,1} on the current
555 // loop, which indicates a strided load. If we have something else, it's a
556 // random load we can't handle.
557 const SCEV *LoadEv = SE->getSCEV(LI->getPointerOperand());
558
559 // The store and load must share the same stride.
560 if (!match(LoadEv, m_scev_AffineAddRec(m_SCEV(), m_scev_Specific(Stride),
561 m_SpecificLoop(CurLoop))))
562 return LegalStoreKind::None;
563
564 // Success. This store can be converted into a memcpy.
565 UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
566 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
567 : LegalStoreKind::Memcpy;
568 }
569 // This store can't be transformed into a memset/memcpy.
570 return LegalStoreKind::None;
571}
572
573void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
574 StoreRefsForMemset.clear();
575 StoreRefsForMemsetPattern.clear();
576 StoreRefsForMemcpy.clear();
577 for (Instruction &I : *BB) {
579 if (!SI)
580 continue;
581
582 // Make sure this is a strided store with a constant stride.
583 switch (isLegalStore(SI)) {
584 case LegalStoreKind::None:
585 // Nothing to do
586 break;
587 case LegalStoreKind::Memset: {
588 // Find the base pointer.
589 Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
590 StoreRefsForMemset[Ptr].push_back(SI);
591 } break;
592 case LegalStoreKind::MemsetPattern: {
593 // Find the base pointer.
594 Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
595 StoreRefsForMemsetPattern[Ptr].push_back(SI);
596 } break;
597 case LegalStoreKind::Memcpy:
598 case LegalStoreKind::UnorderedAtomicMemcpy:
599 StoreRefsForMemcpy.push_back(SI);
600 break;
601 default:
602 assert(false && "unhandled return value");
603 break;
604 }
605 }
606}
607
608/// runOnLoopBlock - Process the specified block, which lives in a counted loop
609/// with the specified backedge count. This block is known to be in the current
610/// loop and not in any subloops.
611bool LoopIdiomRecognize::runOnLoopBlock(
612 BasicBlock *BB, const SCEV *BECount,
613 SmallVectorImpl<BasicBlock *> &ExitBlocks) {
614 // We can only promote stores in this block if they are unconditionally
615 // executed in the loop. For a block to be unconditionally executed, it has
616 // to dominate all the exit blocks of the loop. Verify this now.
617 for (BasicBlock *ExitBlock : ExitBlocks)
618 if (!DT->dominates(BB, ExitBlock))
619 return false;
620
621 bool MadeChange = false;
622 // Look for store instructions, which may be optimized to memset/memcpy.
623 collectStores(BB);
624
625 // Look for a single store or sets of stores with a common base, which can be
626 // optimized into a memset (memset_pattern). The latter most commonly happens
627 // with structs and handunrolled loops.
628 for (auto &SL : StoreRefsForMemset)
629 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
630
631 for (auto &SL : StoreRefsForMemsetPattern)
632 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
633
634 // Optimize the store into a memcpy, if it feeds an similarly strided load.
635 for (auto &SI : StoreRefsForMemcpy)
636 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
637
638 MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
639 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
640 MadeChange |= processLoopMemIntrinsic<MemSetInst>(
641 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
642
643 return MadeChange;
644}
645
646/// See if this store(s) can be promoted to a memset.
647bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
648 const SCEV *BECount, ForMemset For) {
649 // Try to find consecutive stores that can be transformed into memsets.
650 SetVector<StoreInst *> Heads, Tails;
652
653 // Do a quadratic search on all of the given stores and find
654 // all of the pairs of stores that follow each other.
655 SmallVector<unsigned, 16> IndexQueue;
656 for (unsigned i = 0, e = SL.size(); i < e; ++i) {
657 assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
658
659 Value *FirstStoredVal = SL[i]->getValueOperand();
660 Value *FirstStorePtr = SL[i]->getPointerOperand();
661 const SCEVAddRecExpr *FirstStoreEv =
662 cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
663 APInt FirstStride = getStoreStride(FirstStoreEv);
664 unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
665
666 // See if we can optimize just this store in isolation.
667 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
668 Heads.insert(SL[i]);
669 continue;
670 }
671
672 Value *FirstSplatValue = nullptr;
673 Constant *FirstPatternValue = nullptr;
674
675 if (For == ForMemset::Yes)
676 FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
677 else
678 FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
679
680 assert((FirstSplatValue || FirstPatternValue) &&
681 "Expected either splat value or pattern value.");
682
683 IndexQueue.clear();
684 // If a store has multiple consecutive store candidates, search Stores
685 // array according to the sequence: from i+1 to e, then from i-1 to 0.
686 // This is because usually pairing with immediate succeeding or preceding
687 // candidate create the best chance to find memset opportunity.
688 unsigned j = 0;
689 for (j = i + 1; j < e; ++j)
690 IndexQueue.push_back(j);
691 for (j = i; j > 0; --j)
692 IndexQueue.push_back(j - 1);
693
694 for (auto &k : IndexQueue) {
695 assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
696 Value *SecondStorePtr = SL[k]->getPointerOperand();
697 const SCEVAddRecExpr *SecondStoreEv =
698 cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
699 APInt SecondStride = getStoreStride(SecondStoreEv);
700
701 if (FirstStride != SecondStride)
702 continue;
703
704 Value *SecondStoredVal = SL[k]->getValueOperand();
705 Value *SecondSplatValue = nullptr;
706 Constant *SecondPatternValue = nullptr;
707
708 if (For == ForMemset::Yes)
709 SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
710 else
711 SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
712
713 assert((SecondSplatValue || SecondPatternValue) &&
714 "Expected either splat value or pattern value.");
715
716 if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
717 if (For == ForMemset::Yes) {
718 if (isa<UndefValue>(FirstSplatValue))
719 FirstSplatValue = SecondSplatValue;
720 if (FirstSplatValue != SecondSplatValue)
721 continue;
722 } else {
723 if (isa<UndefValue>(FirstPatternValue))
724 FirstPatternValue = SecondPatternValue;
725 if (FirstPatternValue != SecondPatternValue)
726 continue;
727 }
728 Tails.insert(SL[k]);
729 Heads.insert(SL[i]);
730 ConsecutiveChain[SL[i]] = SL[k];
731 break;
732 }
733 }
734 }
735
736 // We may run into multiple chains that merge into a single chain. We mark the
737 // stores that we transformed so that we don't visit the same store twice.
738 SmallPtrSet<Value *, 16> TransformedStores;
739 bool Changed = false;
740
741 // For stores that start but don't end a link in the chain:
742 for (StoreInst *I : Heads) {
743 if (Tails.count(I))
744 continue;
745
746 // We found a store instr that starts a chain. Now follow the chain and try
747 // to transform it.
748 SmallPtrSet<Instruction *, 8> AdjacentStores;
749 StoreInst *HeadStore = I;
750 unsigned StoreSize = 0;
751
752 // Collect the chain into a list.
753 while (Tails.count(I) || Heads.count(I)) {
754 if (TransformedStores.count(I))
755 break;
756 AdjacentStores.insert(I);
757
758 StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
759 // Move to the next value in the chain.
760 I = ConsecutiveChain[I];
761 }
762
763 Value *StoredVal = HeadStore->getValueOperand();
764 Value *StorePtr = HeadStore->getPointerOperand();
765 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
766 APInt Stride = getStoreStride(StoreEv);
767
768 // Check to see if the stride matches the size of the stores. If so, then
769 // we know that every byte is touched in the loop.
770 if (StoreSize != Stride && StoreSize != -Stride)
771 continue;
772
773 bool IsNegStride = StoreSize == -Stride;
774
775 Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
776 const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
777 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
778 MaybeAlign(HeadStore->getAlign()), StoredVal,
779 HeadStore, AdjacentStores, StoreEv, BECount,
780 IsNegStride)) {
781 TransformedStores.insert_range(AdjacentStores);
782 Changed = true;
783 }
784 }
785
786 return Changed;
787}
788
789/// processLoopMemIntrinsic - Template function for calling different processor
790/// functions based on mem intrinsic type.
791template <typename MemInst>
792bool LoopIdiomRecognize::processLoopMemIntrinsic(
793 BasicBlock *BB,
794 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
795 const SCEV *BECount) {
796 bool MadeChange = false;
797 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
798 Instruction *Inst = &*I++;
799 // Look for memory instructions, which may be optimized to a larger one.
800 if (MemInst *MI = dyn_cast<MemInst>(Inst)) {
801 WeakTrackingVH InstPtr(&*I);
802 if (!(this->*Processor)(MI, BECount))
803 continue;
804 MadeChange = true;
805
806 // If processing the instruction invalidated our iterator, start over from
807 // the top of the block.
808 if (!InstPtr)
809 I = BB->begin();
810 }
811 }
812 return MadeChange;
813}
814
815/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy
816bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
817 const SCEV *BECount) {
818 // We can only handle non-volatile memcpys with a constant size.
819 if (MCI->isVolatile() || !isa<ConstantInt>(MCI->getLength()))
820 return false;
821
822 // If we're not allowed to hack on memcpy, we fail.
823 if ((!HasMemcpy && !MCI->isForceInlined()) || DisableLIRP::Memcpy)
824 return false;
825
826 Value *Dest = MCI->getDest();
827 Value *Source = MCI->getSource();
828 if (!Dest || !Source)
829 return false;
830
831 // See if the load and store pointer expressions are AddRec like {base,+,1} on
832 // the current loop, which indicates a strided load and store. If we have
833 // something else, it's a random load or store we can't handle.
834 const SCEV *StoreEv = SE->getSCEV(Dest);
835 const SCEV *LoadEv = SE->getSCEV(Source);
836 const APInt *StoreStrideValue, *LoadStrideValue;
837 if (!match(StoreEv,
838 m_scev_AffineAddRec(m_SCEV(), m_scev_APInt(StoreStrideValue),
839 m_SpecificLoop(CurLoop))) ||
840 !match(LoadEv,
841 m_scev_AffineAddRec(m_SCEV(), m_scev_APInt(LoadStrideValue),
842 m_SpecificLoop(CurLoop))))
843 return false;
844
845 // Reject memcpys that are so large that they overflow an unsigned.
846 uint64_t SizeInBytes = cast<ConstantInt>(MCI->getLength())->getZExtValue();
847 if ((SizeInBytes >> 32) != 0)
848 return false;
849
850 // Huge stride value - give up
851 if (StoreStrideValue->getBitWidth() > 64 ||
852 LoadStrideValue->getBitWidth() > 64)
853 return false;
854
855 if (SizeInBytes != *StoreStrideValue && SizeInBytes != -*StoreStrideValue) {
856 ORE.emit([&]() {
857 return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
858 << ore::NV("Inst", "memcpy") << " in "
859 << ore::NV("Function", MCI->getFunction())
860 << " function will not be hoisted: "
861 << ore::NV("Reason", "memcpy size is not equal to stride");
862 });
863 return false;
864 }
865
866 int64_t StoreStrideInt = StoreStrideValue->getSExtValue();
867 int64_t LoadStrideInt = LoadStrideValue->getSExtValue();
868 // Check if the load stride matches the store stride.
869 if (StoreStrideInt != LoadStrideInt)
870 return false;
871
872 return processLoopStoreOfLoopLoad(
873 Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes),
874 MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI,
875 cast<SCEVAddRecExpr>(StoreEv), cast<SCEVAddRecExpr>(LoadEv), BECount);
876}
877
878/// processLoopMemSet - See if this memset can be promoted to a large memset.
879bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
880 const SCEV *BECount) {
881 // We can only handle non-volatile memsets.
882 if (MSI->isVolatile())
883 return false;
884
885 // If we're not allowed to hack on memset, we fail.
886 if (!HasMemset || DisableLIRP::Memset)
887 return false;
888
889 Value *Pointer = MSI->getDest();
890
891 // See if the pointer expression is an AddRec like {base,+,1} on the current
892 // loop, which indicates a strided store. If we have something else, it's a
893 // random store we can't handle.
894 const SCEV *Ev = SE->getSCEV(Pointer);
895 const SCEV *PointerStrideSCEV;
896 if (!match(Ev, m_scev_AffineAddRec(m_SCEV(), m_SCEV(PointerStrideSCEV),
897 m_SpecificLoop(CurLoop)))) {
898 LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n");
899 return false;
900 }
901
902 SCEVUse MemsetSizeSCEV = SE->getSCEV(MSI->getLength());
903
904 bool IsNegStride = false;
905 const bool IsConstantSize = isa<ConstantInt>(MSI->getLength());
906
907 if (IsConstantSize) {
908 // Memset size is constant.
909 // Check if the pointer stride matches the memset size. If so, then
910 // we know that every byte is touched in the loop.
911 LLVM_DEBUG(dbgs() << " memset size is constant\n");
912 uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
913 const APInt *Stride;
914 if (!match(PointerStrideSCEV, m_scev_APInt(Stride)))
915 return false;
916
917 if (SizeInBytes != *Stride && SizeInBytes != -*Stride)
918 return false;
919
920 IsNegStride = SizeInBytes == -*Stride;
921 } else {
922 // Memset size is non-constant.
923 // Check if the pointer stride matches the memset size.
924 // To be conservative, the pass would not promote pointers that aren't in
925 // address space zero. Also, the pass only handles memset length and stride
926 // that are invariant for the top level loop.
927 LLVM_DEBUG(dbgs() << " memset size is non-constant\n");
928 if (Pointer->getType()->getPointerAddressSpace() != 0) {
929 LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "
930 << "abort\n");
931 return false;
932 }
933 if (!SE->isLoopInvariant(MemsetSizeSCEV, CurLoop)) {
934 LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "
935 << "abort\n");
936 return false;
937 }
938
939 // Compare positive direction PointerStrideSCEV with MemsetSizeSCEV
940 IsNegStride = PointerStrideSCEV->isNonConstantNegative();
941 SCEVUse PositiveStrideSCEV =
942 IsNegStride ? SCEVUse(SE->getNegativeSCEV(PointerStrideSCEV))
943 : SCEVUse(PointerStrideSCEV);
944 LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"
945 << " PositiveStrideSCEV: " << *PositiveStrideSCEV
946 << "\n");
947
948 if (PositiveStrideSCEV != MemsetSizeSCEV) {
949 // If an expression is covered by the loop guard, compare again and
950 // proceed with optimization if equal.
951 const SCEV *FoldedPositiveStride =
952 SE->applyLoopGuards(PositiveStrideSCEV, CurLoop);
953 const SCEV *FoldedMemsetSize =
954 SE->applyLoopGuards(MemsetSizeSCEV, CurLoop);
955
956 LLVM_DEBUG(dbgs() << " Try to fold SCEV based on loop guard\n"
957 << " FoldedMemsetSize: " << *FoldedMemsetSize << "\n"
958 << " FoldedPositiveStride: " << *FoldedPositiveStride
959 << "\n");
960
961 if (FoldedPositiveStride != FoldedMemsetSize) {
962 LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n");
963 return false;
964 }
965 }
966 }
967
968 // Verify that the memset value is loop invariant. If not, we can't promote
969 // the memset.
970 Value *SplatValue = MSI->getValue();
971 if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
972 return false;
973
975 MSIs.insert(MSI);
976 return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
977 MSI->getDestAlign(), SplatValue, MSI, MSIs,
978 cast<SCEVAddRecExpr>(Ev), BECount, IsNegStride,
979 /*IsLoopMemset=*/true);
980}
981
982/// Return true if \p I is a (simple, loop-invariant-valued) store of the same
983/// bytewise value \p SplatByte.
984static bool isSameByteValueStore(Instruction &I, Value *SplatByte, Loop *L,
985 const DataLayout &DL) {
986 assert(SplatByte && "expected a bytewise splat value to match against");
987 auto *SI = dyn_cast<StoreInst>(&I);
988 if (!SI || !SI->isSimple() || !L->isLoopInvariant(SI->getValueOperand()))
989 return false;
990 return isBytewiseValue(SI->getValueOperand(), DL) == SplatByte;
991}
992
993/// mayLoopAccessLocation - Return true if the specified loop might access the
994/// specified pointer location, which is a loop-strided access. The 'Access'
995/// argument specifies what the verboten forms of access are (read or write).
996///
997/// When the access size cannot be bounded, fall back to allow stores writing
998/// the same byte value \p SplatByte.
1000 const SCEV *BECount,
1001 const SCEV *StoreSizeSCEV, AliasAnalysis &AA,
1002 SmallPtrSetImpl<Instruction *> &IgnoredInsts,
1003 Value *SplatByte = nullptr,
1004 const DataLayout *DL = nullptr) {
1005 // Get the location that may be stored across the loop. Since the access is
1006 // strided positively through memory, we say that the modified location starts
1007 // at the pointer and has infinite size.
1009
1010 // If the loop iterates a fixed number of times, we can refine the access size
1011 // to be exactly the size of the memset, which is (BECount+1)*StoreSize
1012 const APInt *BECst, *ConstSize;
1013 if (match(BECount, m_scev_APInt(BECst)) &&
1014 match(StoreSizeSCEV, m_scev_APInt(ConstSize))) {
1015 std::optional<uint64_t> BEInt = BECst->tryZExtValue();
1016 std::optional<uint64_t> SizeInt = ConstSize->tryZExtValue();
1017 // FIXME: Should this check for overflow?
1018 if (BEInt && SizeInt)
1019 AccessSize = LocationSize::precise((*BEInt + 1) * *SizeInt);
1020 }
1021
1022 // TODO: For this to be really effective, we have to dive into the pointer
1023 // operand in the store. Store to &A[i] of 100 will always return may alias
1024 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
1025 // which will then no-alias a store to &A[100].
1026 MemoryLocation StoreLoc(Ptr, AccessSize);
1027
1028 // Only consult the same-byte-value fallback when the access size stayed
1029 // infinite (non-constant trip count); with a precise size AA is accurate.
1030 bool TrySameByteValue = !AccessSize.isPrecise() && SplatByte && DL;
1031
1032 for (BasicBlock *B : L->blocks())
1033 for (Instruction &I : *B)
1034 if (!IgnoredInsts.contains(&I) &&
1035 isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access)) {
1036 if (TrySameByteValue && isSameByteValueStore(I, SplatByte, L, *DL))
1037 continue;
1038 return true;
1039 }
1040 return false;
1041}
1042
1043// If we have a negative stride, Start refers to the end of the memory location
1044// we're trying to memset. Therefore, we need to recompute the base pointer,
1045// which is just Start - BECount*Size.
1046static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
1047 Type *IntPtr, const SCEV *StoreSizeSCEV,
1048 ScalarEvolution *SE) {
1049 const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
1050 if (!StoreSizeSCEV->isOne()) {
1051 // index = back edge count * store size
1052 Index = SE->getMulExpr(Index,
1053 SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
1055 }
1056 // base pointer = start - index * store size
1057 return SE->getMinusSCEV(Start, Index);
1058}
1059
1060/// Compute the number of bytes as a SCEV from the backedge taken count.
1061///
1062/// This also maps the SCEV into the provided type and tries to handle the
1063/// computation in a way that will fold cleanly.
1064static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
1065 const SCEV *StoreSizeSCEV, Loop *CurLoop,
1066 const DataLayout *DL, ScalarEvolution *SE) {
1067 const SCEV *TripCountSCEV =
1068 SE->getTripCountFromExitCount(BECount, IntPtr, CurLoop);
1069 return SE->getMulExpr(TripCountSCEV,
1070 SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
1072}
1073
1074/// processLoopStridedStore - We see a strided store of some value. If we can
1075/// transform this into a memset or memset_pattern in the loop preheader, do so.
1076bool LoopIdiomRecognize::processLoopStridedStore(
1077 Value *DestPtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlignment,
1078 Value *StoredVal, Instruction *TheStore,
1080 const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
1081 Module *M = TheStore->getModule();
1082
1083 // The trip count of the loop and the base pointer of the addrec SCEV is
1084 // guaranteed to be loop invariant, which means that it should dominate the
1085 // header. This allows us to insert code for it in the preheader.
1086 unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
1087 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1088 IRBuilder<> Builder(Preheader->getTerminator());
1089 SCEVExpander Expander(*SE, "loop-idiom");
1090 SCEVExpanderCleaner ExpCleaner(Expander);
1091
1092 Type *DestInt8PtrTy = Builder.getPtrTy(DestAS);
1093 Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
1094
1095 bool Changed = false;
1096 const SCEV *Start = Ev->getStart();
1097 // Handle negative strided loops.
1098 if (IsNegStride)
1099 Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE);
1100
1101 // TODO: ideally we should still be able to generate memset if SCEV expander
1102 // is taught to generate the dependencies at the latest point.
1103 if (!Expander.isSafeToExpand(Start))
1104 return Changed;
1105
1106 // Okay, we have a strided store "p[i]" of a splattable value. We can turn
1107 // this into a memset in the loop preheader now if we want. However, this
1108 // would be unsafe to do if there is anything else in the loop that may read
1109 // or write to the aliased location. Check for any overlap by generating the
1110 // base pointer and checking the region.
1111 Value *BasePtr =
1112 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
1113
1114 // From here on out, conservatively report to the pass manager that we've
1115 // changed the IR, even if we later clean up these added instructions. There
1116 // may be structural differences e.g. in the order of use lists not accounted
1117 // for in just a textual dump of the IR. This is written as a variable, even
1118 // though statically all the places this dominates could be replaced with
1119 // 'true', with the hope that anyone trying to be clever / "more precise" with
1120 // the return value will read this comment, and leave them alone.
1121 Changed = true;
1122
1123 Value *SplatValue = isBytewiseValue(StoredVal, *DL);
1124 if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1125 StoreSizeSCEV, *AA, Stores, SplatValue, DL))
1126 return Changed;
1127
1128 if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
1129 return Changed;
1130
1131 // Okay, everything looks good, insert the memset.
1132 Constant *PatternValue = nullptr;
1133 if (!SplatValue)
1134 PatternValue = getMemSetPatternValue(StoredVal, DL);
1135
1136 // MemsetArg is the number of bytes for the memset libcall, and the number
1137 // of pattern repetitions if the memset.pattern intrinsic is being used.
1138 Value *MemsetArg;
1139 std::optional<int64_t> BytesWritten;
1140
1141 if (PatternValue && (HasMemsetPattern || ForceMemsetPatternIntrinsic)) {
1142 const SCEV *TripCountS =
1143 SE->getTripCountFromExitCount(BECount, IntIdxTy, CurLoop);
1144 if (!Expander.isSafeToExpand(TripCountS))
1145 return Changed;
1146 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1147 if (!ConstStoreSize)
1148 return Changed;
1149 Value *TripCount = Expander.expandCodeFor(TripCountS, IntIdxTy,
1150 Preheader->getTerminator());
1151 uint64_t PatternRepsPerTrip =
1152 (ConstStoreSize->getValue()->getZExtValue() * 8) /
1153 DL->getTypeSizeInBits(PatternValue->getType());
1154 // If ConstStoreSize is not equal to the width of PatternValue, then
1155 // MemsetArg is TripCount * (ConstStoreSize/PatternValueWidth). Else
1156 // MemSetArg is just TripCount.
1157 MemsetArg =
1158 PatternRepsPerTrip == 1
1159 ? TripCount
1160 : Builder.CreateMul(TripCount,
1161 Builder.getIntN(IntIdxTy->getIntegerBitWidth(),
1162 PatternRepsPerTrip));
1163 if (auto *CI = dyn_cast<ConstantInt>(TripCount))
1164 BytesWritten =
1165 CI->getZExtValue() * ConstStoreSize->getValue()->getZExtValue();
1166
1167 } else {
1168 const SCEV *NumBytesS =
1169 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1170
1171 // TODO: ideally we should still be able to generate memset if SCEV expander
1172 // is taught to generate the dependencies at the latest point.
1173 if (!Expander.isSafeToExpand(NumBytesS))
1174 return Changed;
1175 MemsetArg =
1176 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1177 if (auto *CI = dyn_cast<ConstantInt>(MemsetArg))
1178 BytesWritten = CI->getZExtValue();
1179 }
1180 assert(MemsetArg && "MemsetArg should have been set");
1181
1182 AAMDNodes AATags = TheStore->getAAMetadata();
1183 for (Instruction *Store : Stores)
1184 AATags = AATags.merge(Store->getAAMetadata());
1185 if (BytesWritten)
1186 AATags = AATags.extendTo(BytesWritten.value());
1187 else
1188 AATags = AATags.extendTo(-1);
1189
1190 CallInst *NewCall;
1191 if (SplatValue) {
1192 NewCall = Builder.CreateMemSet(BasePtr, SplatValue, MemsetArg,
1193 MaybeAlign(StoreAlignment),
1194 /*isVolatile=*/false, AATags);
1195 } else if (ForceMemsetPatternIntrinsic ||
1196 isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
1197 assert(isa<SCEVConstant>(StoreSizeSCEV) && "Expected constant store size");
1198
1199 NewCall = Builder.CreateIntrinsicWithoutFolding(
1200 Intrinsic::experimental_memset_pattern,
1201 {DestInt8PtrTy, PatternValue->getType(), IntIdxTy},
1202 {BasePtr, PatternValue, MemsetArg,
1203 ConstantInt::getFalse(M->getContext())});
1204 if (StoreAlignment)
1205 cast<MemSetPatternInst>(NewCall)->setDestAlignment(*StoreAlignment);
1206 NewCall->setAAMetadata(AATags);
1207 } else {
1208 // Neither a memset, nor memset_pattern16
1209 return Changed;
1210 }
1211
1212 NewCall->setDebugLoc(TheStore->getDebugLoc());
1213
1214 if (MSSAU) {
1215 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1216 NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1217 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1218 }
1219
1220 LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
1221 << " from store to: " << *Ev << " at: " << *TheStore
1222 << "\n");
1223
1224 ORE.emit([&]() {
1225 OptimizationRemark R(DEBUG_TYPE, "ProcessLoopStridedStore",
1226 NewCall->getDebugLoc(), Preheader);
1227 R << "Transformed loop-strided store in "
1228 << ore::NV("Function", TheStore->getFunction())
1229 << " function into a call to "
1230 << ore::NV("NewFunction", NewCall->getCalledFunction())
1231 << "() intrinsic";
1232 if (!Stores.empty())
1233 R << ore::setExtraArgs();
1234 for (auto *I : Stores) {
1235 R << ore::NV("FromBlock", I->getParent()->getName())
1236 << ore::NV("ToBlock", Preheader->getName());
1237 }
1238 return R;
1239 });
1240
1241 // Okay, the memset has been formed. Zap the original store and anything that
1242 // feeds into it.
1243 for (auto *I : Stores) {
1244 if (MSSAU)
1245 MSSAU->removeMemoryAccess(I, true);
1247 }
1248 if (MSSAU && VerifyMemorySSA)
1249 MSSAU->getMemorySSA()->verifyMemorySSA();
1250 ++NumMemSet;
1251 ExpCleaner.markResultUsed();
1252 return true;
1253}
1254
1255/// If the stored value is a strided load in the same loop with the same stride
1256/// this may be transformable into a memcpy. This kicks in for stuff like
1257/// for (i) A[i] = B[i];
1258bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
1259 const SCEV *BECount) {
1260 assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
1261
1262 Value *StorePtr = SI->getPointerOperand();
1263 const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1264 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1265
1266 // The store must be feeding a non-volatile load.
1267 LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1268 assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
1269
1270 // See if the pointer expression is an AddRec like {base,+,1} on the current
1271 // loop, which indicates a strided load. If we have something else, it's a
1272 // random load we can't handle.
1273 Value *LoadPtr = LI->getPointerOperand();
1274 const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1275
1276 const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize);
1277 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
1278 SI->getAlign(), LI->getAlign(), SI, LI,
1279 StoreEv, LoadEv, BECount);
1280}
1281
1282namespace {
1283class MemmoveVerifier {
1284public:
1285 explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
1286 const DataLayout &DL)
1288 LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
1290 StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
1291 IsSameObject(BP1 == BP2) {}
1292
1293 bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
1294 const Instruction &TheLoad,
1295 bool IsMemCpy) const {
1296 if (IsMemCpy) {
1297 // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1298 // for negative stride.
1299 if ((!IsNegStride && LoadOff <= StoreOff) ||
1300 (IsNegStride && LoadOff >= StoreOff))
1301 return false;
1302 } else {
1303 // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
1304 // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
1305 int64_t LoadSize =
1306 DL.getTypeSizeInBits(TheLoad.getType()).getFixedValue() / 8;
1307 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
1308 return false;
1309 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
1310 (IsNegStride && LoadOff + LoadSize > StoreOff))
1311 return false;
1312 }
1313 return true;
1314 }
1315
1316private:
1317 const DataLayout &DL;
1318 int64_t LoadOff = 0;
1319 int64_t StoreOff = 0;
1320 const Value *BP1;
1321 const Value *BP2;
1322
1323public:
1324 const bool IsSameObject;
1325};
1326} // namespace
1327
1328bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
1329 Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
1330 MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
1331 Instruction *TheLoad, const SCEVAddRecExpr *StoreEv,
1332 const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
1333
1334 // FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
1335 // conservatively bail here, since otherwise we may have to transform
1336 // llvm.memcpy.inline into llvm.memcpy which is illegal.
1337 if (auto *MCI = dyn_cast<MemCpyInst>(TheStore); MCI && MCI->isForceInlined())
1338 return false;
1339
1340 // The trip count of the loop and the base pointer of the addrec SCEV is
1341 // guaranteed to be loop invariant, which means that it should dominate the
1342 // header. This allows us to insert code for it in the preheader.
1343 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1344 IRBuilder<> Builder(Preheader->getTerminator());
1345 SCEVExpander Expander(*SE, "loop-idiom");
1346
1347 SCEVExpanderCleaner ExpCleaner(Expander);
1348
1349 bool Changed = false;
1350 const SCEV *StrStart = StoreEv->getStart();
1351 unsigned StrAS = DestPtr->getType()->getPointerAddressSpace();
1352 Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
1353
1354 APInt Stride = getStoreStride(StoreEv);
1355 const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
1356
1357 // TODO: Deal with non-constant size; Currently expect constant store size
1358 assert(ConstStoreSize && "store size is expected to be a constant");
1359
1360 int64_t StoreSize = ConstStoreSize->getValue()->getZExtValue();
1361 bool IsNegStride = StoreSize == -Stride;
1362
1363 // Handle negative strided loops.
1364 if (IsNegStride)
1365 StrStart =
1366 getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1367
1368 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1369 // this into a memcpy in the loop preheader now if we want. However, this
1370 // would be unsafe to do if there is anything else in the loop that may read
1371 // or write the memory region we're storing to. This includes the load that
1372 // feeds the stores. Check for an alias by generating the base address and
1373 // checking everything.
1374 Value *StoreBasePtr = Expander.expandCodeFor(
1375 StrStart, Builder.getPtrTy(StrAS), Preheader->getTerminator());
1376
1377 // From here on out, conservatively report to the pass manager that we've
1378 // changed the IR, even if we later clean up these added instructions. There
1379 // may be structural differences e.g. in the order of use lists not accounted
1380 // for in just a textual dump of the IR. This is written as a variable, even
1381 // though statically all the places this dominates could be replaced with
1382 // 'true', with the hope that anyone trying to be clever / "more precise" with
1383 // the return value will read this comment, and leave them alone.
1384 Changed = true;
1385
1386 SmallPtrSet<Instruction *, 2> IgnoredInsts;
1387 IgnoredInsts.insert(TheStore);
1388
1389 bool IsMemCpy = isa<MemCpyInst>(TheStore);
1390 const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
1391
1392 bool LoopAccessStore =
1393 mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1394 StoreSizeSCEV, *AA, IgnoredInsts);
1395 if (LoopAccessStore) {
1396 // For memmove case it's not enough to guarantee that loop doesn't access
1397 // TheStore and TheLoad. Additionally we need to make sure that TheStore is
1398 // the only user of TheLoad.
1399 if (!TheLoad->hasOneUse())
1400 return Changed;
1401 IgnoredInsts.insert(TheLoad);
1402 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
1403 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
1404 ORE.emit([&]() {
1405 return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
1406 TheStore)
1407 << ore::NV("Inst", InstRemark) << " in "
1408 << ore::NV("Function", TheStore->getFunction())
1409 << " function will not be hoisted: "
1410 << ore::NV("Reason", "The loop may access store location");
1411 });
1412 return Changed;
1413 }
1414 IgnoredInsts.erase(TheLoad);
1415 }
1416
1417 const SCEV *LdStart = LoadEv->getStart();
1418 unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
1419
1420 // Handle negative strided loops.
1421 if (IsNegStride)
1422 LdStart =
1423 getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
1424
1425 // For a memcpy, we have to make sure that the input array is not being
1426 // mutated by the loop.
1427 Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS),
1428 Preheader->getTerminator());
1429
1430 // If the store is a memcpy instruction, we must check if it will write to
1431 // the load memory locations. So remove it from the ignored stores.
1432 MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
1433 if (IsMemCpy && !Verifier.IsSameObject)
1434 IgnoredInsts.erase(TheStore);
1435 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1436 StoreSizeSCEV, *AA, IgnoredInsts)) {
1437 ORE.emit([&]() {
1438 return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
1439 << ore::NV("Inst", InstRemark) << " in "
1440 << ore::NV("Function", TheStore->getFunction())
1441 << " function will not be hoisted: "
1442 << ore::NV("Reason", "The loop may access load location");
1443 });
1444 return Changed;
1445 }
1446
1447 bool IsAtomic = TheStore->isAtomic() || TheLoad->isAtomic();
1448 bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
1449
1450 if (IsAtomic) {
1451 // For now don't support unordered atomic memmove.
1452 if (UseMemMove)
1453 return Changed;
1454
1455 // We cannot allow unaligned ops for unordered load/store, so reject
1456 // anything where the alignment isn't at least the element size.
1457 assert((StoreAlign && LoadAlign) &&
1458 "Expect unordered load/store to have align.");
1459 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)
1460 return Changed;
1461
1462 // If the element.atomic memcpy is not lowered into explicit
1463 // loads/stores later, then it will be lowered into an element-size
1464 // specific lib call. If the lib call doesn't exist for our store size, then
1465 // we shouldn't generate the memcpy.
1466 if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1467 return Changed;
1468 }
1469
1470 if (UseMemMove)
1471 if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
1472 IsMemCpy))
1473 return Changed;
1474
1475 if (avoidLIRForMultiBlockLoop())
1476 return Changed;
1477
1478 // Okay, everything is safe, we can transform this!
1479
1480 const SCEV *NumBytesS =
1481 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
1482
1483 Value *NumBytes =
1484 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
1485
1486 AAMDNodes AATags = TheLoad->getAAMetadata();
1487 AAMDNodes StoreAATags = TheStore->getAAMetadata();
1488 AATags = AATags.merge(StoreAATags);
1489 if (auto CI = dyn_cast<ConstantInt>(NumBytes))
1490 AATags = AATags.extendTo(CI->getZExtValue());
1491 else
1492 AATags = AATags.extendTo(-1);
1493
1494 CallInst *NewCall = nullptr;
1495 // Check whether to generate an unordered atomic memcpy:
1496 // If the load or store are atomic, then they must necessarily be unordered
1497 // by previous checks.
1498 if (!IsAtomic) {
1499 if (UseMemMove)
1500 NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr,
1501 LoadAlign, NumBytes,
1502 /*isVolatile=*/false, AATags);
1503 else
1504 NewCall =
1505 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
1506 NumBytes, /*isVolatile=*/false, AATags);
1507 } else {
1508 // Create the call.
1509 // Note that unordered atomic loads/stores are *required* by the spec to
1510 // have an alignment but non-atomic loads/stores may not.
1511 NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1512 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,
1513 AATags);
1514 }
1515 NewCall->setDebugLoc(TheStore->getDebugLoc());
1516
1517 if (MSSAU) {
1518 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
1519 NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
1520 MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
1521 }
1522
1523 LLVM_DEBUG(dbgs() << " Formed new call: " << *NewCall << "\n"
1524 << " from load ptr=" << *LoadEv << " at: " << *TheLoad
1525 << "\n"
1526 << " from store ptr=" << *StoreEv << " at: " << *TheStore
1527 << "\n");
1528
1529 ORE.emit([&]() {
1530 return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
1531 NewCall->getDebugLoc(), Preheader)
1532 << "Formed a call to "
1533 << ore::NV("NewFunction", NewCall->getCalledFunction())
1534 << "() intrinsic from " << ore::NV("Inst", InstRemark)
1535 << " instruction in " << ore::NV("Function", TheStore->getFunction())
1536 << " function"
1538 << ore::NV("FromBlock", TheStore->getParent()->getName())
1539 << ore::NV("ToBlock", Preheader->getName());
1540 });
1541
1542 // Okay, a new call to memcpy/memmove has been formed. Zap the original store
1543 // and anything that feeds into it.
1544 if (MSSAU)
1545 MSSAU->removeMemoryAccess(TheStore, true);
1546 deleteDeadInstruction(TheStore);
1547 if (MSSAU && VerifyMemorySSA)
1548 MSSAU->getMemorySSA()->verifyMemorySSA();
1549 if (UseMemMove)
1550 ++NumMemMove;
1551 else
1552 ++NumMemCpy;
1553 ExpCleaner.markResultUsed();
1554 return true;
1555}
1556
1557// When compiling for codesize we avoid idiom recognition for a multi-block loop
1558// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1559//
1560bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1561 bool IsLoopMemset) {
1562 if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1563 if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {
1564 LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1565 << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1566 << " avoided: multi-block top-level loop\n");
1567 return true;
1568 }
1569 }
1570
1571 return false;
1572}
1573
1574bool LoopIdiomRecognize::optimizeCRCLoop(const PolynomialInfo &Info) {
1575 // FIXME: Hexagon has a special HexagonLoopIdiom that optimizes CRC using
1576 // carry-less multiplication instructions, which is more efficient than our
1577 // Sarwate table-lookup optimization. Hence, until we're able to emit
1578 // target-specific instructions for Hexagon, subsuming HexagonLoopIdiom,
1579 // disable the optimization for Hexagon.
1580 Module &M = *CurLoop->getHeader()->getModule();
1581 Triple TT(M.getTargetTriple());
1582 if (TT.getArch() == Triple::hexagon)
1583 return false;
1584
1585 // First, create a new GlobalVariable corresponding to the
1586 // Sarwate-lookup-table.
1587 Type *CRCTy = Info.LHS->getType();
1588 unsigned CRCBW = CRCTy->getIntegerBitWidth();
1589 std::array<Constant *, 256> CRCConstants;
1591 CRCConstants.begin(),
1592 [CRCTy](const APInt &E) { return ConstantInt::get(CRCTy, E); });
1593 Constant *ConstArray =
1594 ConstantArray::get(ArrayType::get(CRCTy, 256), CRCConstants);
1595 GlobalVariable *GV =
1596 new GlobalVariable(M, ConstArray->getType(), true,
1597 GlobalValue::PrivateLinkage, ConstArray, ".crctable");
1598
1601
1602 // Next, mark all PHIs for removal except IV.
1603 {
1604 for (PHINode &PN : CurLoop->getHeader()->phis()) {
1605 if (&PN == IV)
1606 continue;
1607 PN.replaceAllUsesWith(PoisonValue::get(PN.getType()));
1608 Cleanup.push_back(&PN);
1609 }
1610 }
1611
1612 // Next, fix up the trip count.
1613 {
1614 unsigned NewBTC = (Info.TripCount / 8) - 1;
1615 BasicBlock *LoopBlk = CurLoop->getLoopLatch();
1616 CondBrInst *BrInst = cast<CondBrInst>(LoopBlk->getTerminator());
1617 CmpPredicate ExitPred = BrInst->getSuccessor(0) == LoopBlk
1620 Instruction *ExitCond = CurLoop->getLatchCmpInst();
1621 Value *ExitLimit = ConstantInt::get(IV->getType(), NewBTC);
1622 IRBuilder<> Builder(ExitCond);
1623 Value *NewExitCond =
1624 Builder.CreateICmp(ExitPred, IV, ExitLimit, "exit.cond");
1625 ExitCond->replaceAllUsesWith(NewExitCond);
1626 deleteDeadInstruction(ExitCond);
1627 }
1628
1629 // Finally, fill the loop with the Sarwate-table-lookup logic, and replace all
1630 // uses of ComputedValue.
1631 //
1632 // Little-endian:
1633 // crc = (crc >> 8) ^ tbl[(iv'th byte of data) ^ (bottom byte of crc)]
1634 // Big-Endian:
1635 // crc = (crc << 8) ^ tbl[(iv'th byte of data) ^ (top byte of crc)]
1636 {
1637 auto LoByte = [](IRBuilderBase &Builder, Value *Op, const Twine &Name) {
1638 return Builder.CreateZExtOrTrunc(
1639 Op, IntegerType::getInt8Ty(Op->getContext()), Name);
1640 };
1641 auto HiIdx = [LoByte, CRCBW](IRBuilderBase &Builder, Value *Op,
1642 const Twine &Name) {
1643 Type *OpTy = Op->getType();
1644
1645 // When the bitwidth of the CRC mismatches the Op's bitwidth, we need to
1646 // use the CRC's bitwidth as the reference for shifting right.
1647 return LoByte(Builder,
1648 CRCBW > 8 ? Builder.CreateLShr(
1649 Op, ConstantInt::get(OpTy, CRCBW - 8), Name)
1650 : Op,
1651 Name + ".lo.byte");
1652 };
1653
1654 IRBuilder<> Builder(CurLoop->getHeader(),
1655 CurLoop->getHeader()->getFirstNonPHIIt());
1656
1657 // Create the CRC PHI, and initialize its incoming value to the initial
1658 // value of CRC.
1659 PHINode *CRCPhi = Builder.CreatePHI(CRCTy, 2, "crc");
1660 CRCPhi->addIncoming(Info.LHS, CurLoop->getLoopPreheader());
1661
1662 // CRC is now an evolving variable, initialized to the PHI.
1663 Value *CRC = CRCPhi;
1664
1665 // TableIndexer = ((top|bottom) byte of CRC). It is XOR'ed with (iv'th byte
1666 // of LHSAux), if LHSAux is non-nullptr.
1667 Value *Indexer = CRC;
1668 if (Value *Data = Info.LHSAux) {
1669 Type *DataTy = Data->getType();
1670
1671 // To index into the (iv'th byte of LHSAux), we multiply iv by 8, and we
1672 // shift right by that amount, and take the lo-byte (in the little-endian
1673 // case), or shift left by that amount, and take the hi-idx (in the
1674 // big-endian case).
1675 Value *IVBits = Builder.CreateZExtOrTrunc(
1676 Builder.CreateShl(IV, 3, "iv.bits"), DataTy, "iv.indexer");
1677 Value *DataIndexer =
1678 Info.IsBigEndian ? Builder.CreateShl(Data, IVBits, "data.indexer")
1679 : Builder.CreateLShr(Data, IVBits, "data.indexer");
1680 Indexer = Builder.CreateXor(
1681 DataIndexer,
1682 Builder.CreateZExtOrTrunc(Indexer, DataTy, "crc.indexer.cast"),
1683 "crc.data.indexer");
1684 }
1685
1686 Indexer = Info.IsBigEndian ? HiIdx(Builder, Indexer, "indexer.hi")
1687 : LoByte(Builder, Indexer, "indexer.lo");
1688
1689 // Always index into a GEP using the index type.
1690 Indexer = Builder.CreateZExt(
1691 Indexer, SE->getDataLayout().getIndexType(GV->getType()),
1692 "indexer.ext");
1693
1694 // CRCTableLd = CRCTable[(iv'th byte of data) ^ (top|bottom) byte of CRC].
1695 Value *CRCTableGEP =
1696 Builder.CreateInBoundsGEP(CRCTy, GV, Indexer, "tbl.ptradd");
1697 Value *CRCTableLd = Builder.CreateLoad(CRCTy, CRCTableGEP, "tbl.ld");
1698
1699 // CRCNext = (CRC (<<|>>) 8) ^ CRCTableLd, or simply CRCTableLd in case of
1700 // CRC-8.
1701 Value *CRCNext = CRCTableLd;
1702 if (CRCBW > 8) {
1703 Value *CRCShift = Info.IsBigEndian
1704 ? Builder.CreateShl(CRC, 8, "crc.be.shift")
1705 : Builder.CreateLShr(CRC, 8, "crc.le.shift");
1706 CRCNext = Builder.CreateXor(CRCShift, CRCTableLd, "crc.next");
1707 }
1708
1709 // Connect the back-edge for the loop, and RAUW the ComputedValue.
1710 CRCPhi->addIncoming(CRCNext, CurLoop->getLoopLatch());
1711 Info.ComputedValue->replaceUsesOutsideBlock(CRCNext,
1712 CurLoop->getLoopLatch());
1713 }
1714
1715 // Cleanup.
1716 {
1717 for (PHINode *PN : Cleanup)
1719 SE->forgetLoop(CurLoop);
1720 }
1721 return true;
1722}
1723
1724bool LoopIdiomRecognize::runOnNoncountableLoop() {
1725 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
1726 << CurLoop->getHeader()->getParent()->getName()
1727 << "] Noncountable Loop %"
1728 << CurLoop->getHeader()->getName() << "\n");
1729
1730 return recognizePopcount() || recognizeAndInsertFFS() ||
1731 recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1732 recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
1733}
1734
1735/// Check if the given conditional branch is based on the comparison between
1736/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1737/// true), the control yields to the loop entry. If the branch matches the
1738/// behavior, the variable involved in the comparison is returned. This function
1739/// will be called to see if the precondition and postcondition of the loop are
1740/// in desirable form.
1742 bool JmpOnZero = false) {
1744 if (!Cond)
1745 return nullptr;
1746
1747 auto *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1748 if (!CmpZero || !CmpZero->isZero())
1749 return nullptr;
1750
1751 BasicBlock *TrueSucc = BI->getSuccessor(0);
1752 BasicBlock *FalseSucc = BI->getSuccessor(1);
1753 if (JmpOnZero)
1754 std::swap(TrueSucc, FalseSucc);
1755
1756 ICmpInst::Predicate Pred = Cond->getPredicate();
1757 if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1758 (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1759 return Cond->getOperand(0);
1760
1761 return nullptr;
1762}
1763
1764namespace {
1765
1766class StrlenVerifier {
1767public:
1768 explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE,
1769 const TargetLibraryInfo *TLI)
1770 : CurLoop(CurLoop), SE(SE), TLI(TLI) {}
1771
1772 bool isValidStrlenIdiom() {
1773 // Give up if the loop has multiple blocks, multiple backedges, or
1774 // multiple exit blocks
1775 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 ||
1776 !CurLoop->getUniqueExitBlock())
1777 return false;
1778
1779 // It should have a preheader and a branch instruction.
1780 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1781 if (!Preheader ||
1783 return false;
1784
1785 // The loop exit must be conditioned on an icmp with 0 the null terminator.
1786 // The icmp operand has to be a load on some SSA reg that increments
1787 // by 1 in the loop.
1788 BasicBlock *LoopBody = *CurLoop->block_begin();
1789
1790 // Skip if the body is too big as it most likely is not a strlen idiom.
1791 if (!LoopBody || LoopBody->size() >= 15)
1792 return false;
1793
1794 CondBrInst *LoopTerm = dyn_cast<CondBrInst>(LoopBody->getTerminator());
1795 if (!LoopTerm)
1796 return false;
1797 Value *LoopCond = matchCondition(LoopTerm, LoopBody);
1798 if (!LoopCond)
1799 return false;
1800
1801 LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
1802 if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0)
1803 return false;
1804
1805 OperandType = LoopLoad->getType();
1806 if (!OperandType || !OperandType->isIntegerTy())
1807 return false;
1808
1809 // See if the pointer expression is an AddRec with constant step a of form
1810 // ({n,+,a}) where a is the width of the char type.
1811 Value *IncPtr = LoopLoad->getPointerOperand();
1812 const SCEV *LoadEv = SE->getSCEV(IncPtr);
1813 const APInt *Step;
1814 if (!match(LoadEv,
1815 m_scev_AffineAddRec(m_SCEV(LoadBaseEv), m_scev_APInt(Step))))
1816 return false;
1817
1818 LLVM_DEBUG(dbgs() << "pointer load scev: " << *LoadEv << "\n");
1819
1820 unsigned StepSize = Step->getZExtValue();
1821
1822 // Verify that StepSize is consistent with platform char width.
1823 OpWidth = OperandType->getIntegerBitWidth();
1824 unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule());
1825 if (OpWidth != StepSize * 8)
1826 return false;
1827 if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32)
1828 return false;
1829 if (OpWidth >= 16)
1830 if (OpWidth != WcharSize * 8)
1831 return false;
1832
1833 // Scan every instruction in the loop to ensure there are no side effects.
1834 for (Instruction &I : *LoopBody)
1835 if (I.mayHaveSideEffects())
1836 return false;
1837
1838 BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1839 if (!LoopExitBB)
1840 return false;
1841
1842 for (PHINode &PN : LoopExitBB->phis()) {
1843 if (!SE->isSCEVable(PN.getType()))
1844 return false;
1845
1846 const SCEV *Ev = SE->getSCEV(&PN);
1847 if (!Ev)
1848 return false;
1849
1850 LLVM_DEBUG(dbgs() << "loop exit phi scev: " << *Ev << "\n");
1851
1852 // Since we verified that the loop trip count will be a valid strlen
1853 // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
1854 // SCEVExpander materialize the loop output.
1855 const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1856 if (!AddRecEv || !AddRecEv->isAffine())
1857 return false;
1858
1859 // We only want RecAddExpr with recurrence step that is constant. This
1860 // is good enough for all the idioms we want to recognize. Later we expand
1861 // and materialize the recurrence as {base,+,a} -> (base + a * strlen)
1862 if (!isa<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)))
1863 return false;
1864 }
1865
1866 return true;
1867 }
1868
1869public:
1870 const Loop *CurLoop;
1871 ScalarEvolution *SE;
1872 const TargetLibraryInfo *TLI;
1873
1874 unsigned OpWidth;
1875 ConstantInt *StepSizeCI;
1876 const SCEV *LoadBaseEv;
1878};
1879
1880} // namespace
1881
1882/// The Strlen Idiom we are trying to detect has the following structure
1883///
1884/// preheader:
1885/// ...
1886/// br label %body, ...
1887///
1888/// body:
1889/// ... ; %0 is incremented by a gep
1890/// %1 = load i8, ptr %0, align 1
1891/// %2 = icmp eq i8 %1, 0
1892/// br i1 %2, label %exit, label %body
1893///
1894/// exit:
1895/// %lcssa = phi [%0, %body], ...
1896///
1897/// We expect the strlen idiom to have a load of a character type that
1898/// is compared against '\0', and such load pointer operand must have scev
1899/// expression of the form {%str,+,c} where c is a ConstantInt of the
1900/// appropiate character width for the idiom, and %str is the base of the string
1901/// And, that all lcssa phis have the form {...,+,n} where n is a constant,
1902///
1903/// When transforming the output of the strlen idiom, the lccsa phi are
1904/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
1905/// and all subsequent uses are replaced. For example,
1906///
1907/// \code{.c}
1908/// const char* base = str;
1909/// while (*str != '\0')
1910/// ++str;
1911/// size_t result = str - base;
1912/// \endcode
1913///
1914/// will be transformed as follows: The idiom will be replaced by a strlen
1915/// computation to compute the address of the null terminator of the string.
1916///
1917/// \code{.c}
1918/// const char* base = str;
1919/// const char* end = base + strlen(str);
1920/// size_t result = end - base;
1921/// \endcode
1922///
1923/// In the case we index by an induction variable, as long as the induction
1924/// variable has a constant int increment, we can replace all such indvars
1925/// with the closed form computation of strlen
1926///
1927/// \code{.c}
1928/// size_t i = 0;
1929/// while (str[i] != '\0')
1930/// ++i;
1931/// size_t result = i;
1932/// \endcode
1933///
1934/// Will be replaced by
1935///
1936/// \code{.c}
1937/// size_t i = 0 + strlen(str);
1938/// size_t result = i;
1939/// \endcode
1940///
1941bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
1942 if (DisableLIRP::All)
1943 return false;
1944
1945 StrlenVerifier Verifier(CurLoop, SE, TLI);
1946
1947 if (!Verifier.isValidStrlenIdiom())
1948 return false;
1949
1950 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1951 BasicBlock *LoopBody = *CurLoop->block_begin();
1952 BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1953 CondBrInst *LoopTerm = cast<CondBrInst>(LoopBody->getTerminator());
1954 assert(Preheader && LoopBody && LoopExitBB &&
1955 "Should be verified to be valid by StrlenVerifier");
1956
1957 if (Verifier.OpWidth == 8) {
1959 return false;
1960 if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen))
1961 return false;
1962 } else {
1964 return false;
1965 if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen))
1966 return false;
1967 }
1968
1969 IRBuilder<> Builder(Preheader->getTerminator());
1970 Builder.SetCurrentDebugLocation(CurLoop->getStartLoc());
1971 SCEVExpander Expander(*SE, "strlen_idiom");
1972 Value *MaterialzedBase = Expander.expandCodeFor(
1973 Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(),
1974 Builder.GetInsertPoint());
1975
1976 Value *StrLenFunc = nullptr;
1977 if (Verifier.OpWidth == 8) {
1978 StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI);
1979 } else {
1980 StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI);
1981 }
1982 assert(StrLenFunc && "Failed to emit strlen function.");
1983
1984 const SCEV *StrlenEv = SE->getSCEV(StrLenFunc);
1986 for (PHINode &PN : LoopExitBB->phis()) {
1987 // We can now materialize the loop output as all phi have scev {base,+,a}.
1988 // We expand the phi as:
1989 // %strlen = call i64 @strlen(%str)
1990 // %phi.new = base expression + step * %strlen
1991 const SCEV *Ev = SE->getSCEV(&PN);
1992 const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1993 const SCEVConstant *Step =
1995 const SCEV *Base = AddRecEv->getStart();
1996
1997 // It is safe to truncate to base since if base is narrower than size_t
1998 // the equivalent user code will have to truncate anyways.
1999 const SCEV *NewEv = SE->getAddExpr(
2001 StrlenEv, Base->getType())));
2002
2003 Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(),
2004 Builder.GetInsertPoint());
2005 Expander.clear();
2006 PN.replaceAllUsesWith(MaterializedPHI);
2007 Cleanup.push_back(&PN);
2008 }
2009
2010 // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
2011 // up by later passes
2012 for (PHINode *PN : Cleanup)
2014
2015 // LoopDeletion only delete invariant loops with known trip-count. We can
2016 // update the condition so it will reliablely delete the invariant loop
2017 assert((LoopTerm->getSuccessor(0) == LoopBody ||
2018 LoopTerm->getSuccessor(1) == LoopBody) &&
2019 "loop body must have a successor that is it self");
2020 ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody
2021 ? Builder.getFalse()
2022 : Builder.getTrue();
2023 LoopTerm->setCondition(NewLoopCond);
2024 SE->forgetLoop(CurLoop);
2025
2026 ++NumStrLen;
2027 LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n");
2028 ORE.emit([&]() {
2029 return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
2030 CurLoop->getStartLoc(), Preheader)
2031 << "Transformed " << StrLenFunc->getName() << " loop idiom";
2032 });
2033
2034 return true;
2035}
2036
2037/// Check if the given conditional branch is based on an unsigned less-than
2038/// comparison between a variable and a constant, and if the comparison is false
2039/// the control yields to the loop entry. If the branch matches the behaviour,
2040/// the variable involved in the comparison is returned.
2042 APInt &Threshold) {
2044 if (!Cond)
2045 return nullptr;
2046
2047 ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
2048 if (!CmpConst)
2049 return nullptr;
2050
2051 BasicBlock *FalseSucc = BI->getSuccessor(1);
2052 ICmpInst::Predicate Pred = Cond->getPredicate();
2053
2054 if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
2055 Threshold = CmpConst->getValue();
2056 return Cond->getOperand(0);
2057 }
2058
2059 return nullptr;
2060}
2061
2062// Check if the recurrence variable `VarX` is in the right form to create
2063// the idiom. Returns the value coerced to a PHINode if so.
2065 BasicBlock *LoopEntry) {
2066 auto *PhiX = dyn_cast<PHINode>(VarX);
2067 if (PhiX && PhiX->getParent() == LoopEntry &&
2068 (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
2069 return PhiX;
2070 return nullptr;
2071}
2072
2073/// Return true if the idiom is detected in the loop.
2074///
2075/// Additionally:
2076/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
2077/// or nullptr if there is no such.
2078/// 2) \p CntPhi is set to the corresponding phi node
2079/// or nullptr if there is no such.
2080/// 3) \p InitX is set to the value whose CTLZ could be used.
2081/// 4) \p DefX is set to the instruction calculating Loop exit condition.
2082/// 5) \p Threshold is set to the constant involved in the unsigned less-than
2083/// comparison.
2084///
2085/// The core idiom we are trying to detect is:
2086/// \code
2087/// if (x0 < 2)
2088/// goto loop-exit // the precondition of the loop
2089/// cnt0 = init-val
2090/// do {
2091/// x = phi (x0, x.next); //PhiX
2092/// cnt = phi (cnt0, cnt.next)
2093///
2094/// cnt.next = cnt + 1;
2095/// ...
2096/// x.next = x >> 1; // DefX
2097/// } while (x >= 4)
2098/// loop-exit:
2099/// \endcode
2101 Intrinsic::ID &IntrinID,
2102 Value *&InitX, Instruction *&CntInst,
2103 PHINode *&CntPhi, Instruction *&DefX,
2104 APInt &Threshold) {
2105 BasicBlock *LoopEntry;
2106
2107 DefX = nullptr;
2108 CntInst = nullptr;
2109 CntPhi = nullptr;
2110 LoopEntry = *(CurLoop->block_begin());
2111
2112 // step 1: Check if the loop-back branch is in desirable form.
2113 auto *EntryBI = dyn_cast<CondBrInst>(LoopEntry->getTerminator());
2114 if (!EntryBI)
2115 return false;
2116 if (Value *T = matchShiftULTCondition(EntryBI, LoopEntry, Threshold))
2117 DefX = dyn_cast<Instruction>(T);
2118 else
2119 return false;
2120
2121 // step 2: Check the recurrence of variable X
2122 if (!DefX || !isa<PHINode>(DefX))
2123 return false;
2124
2125 PHINode *VarPhi = cast<PHINode>(DefX);
2126 int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
2127 if (Idx == -1)
2128 return false;
2129
2130 DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
2131 if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
2132 return false;
2133
2134 // step 3: detect instructions corresponding to "x.next = x >> 1"
2135 if (DefX->getOpcode() != Instruction::LShr)
2136 return false;
2137
2138 IntrinID = Intrinsic::ctlz;
2140 if (!Shft || !Shft->isOne())
2141 return false;
2142
2143 InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
2144
2145 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
2146 // or cnt.next = cnt + -1.
2147 // TODO: We can skip the step. If loop trip count is known (CTLZ),
2148 // then all uses of "cnt.next" could be optimized to the trip count
2149 // plus "cnt0". Currently it is not optimized.
2150 // This step could be used to detect POPCNT instruction:
2151 // cnt.next = cnt + (x.next & 1)
2152 for (Instruction &Inst :
2153 llvm::make_range(LoopEntry->getFirstNonPHIIt(), LoopEntry->end())) {
2154 if (Inst.getOpcode() != Instruction::Add)
2155 continue;
2156
2158 if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
2159 continue;
2160
2161 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
2162 if (!Phi)
2163 continue;
2164
2165 CntInst = &Inst;
2166 CntPhi = Phi;
2167 break;
2168 }
2169 if (!CntInst)
2170 return false;
2171
2172 return true;
2173}
2174
2175/// Return true iff the idiom is detected in the loop.
2176///
2177/// Additionally:
2178/// 1) \p CntInst is set to the instruction counting the population bit.
2179/// 2) \p CntPhi is set to the corresponding phi node.
2180/// 3) \p Var is set to the value whose population bits are being counted.
2181///
2182/// The core idiom we are trying to detect is:
2183/// \code
2184/// if (x0 != 0)
2185/// goto loop-exit // the precondition of the loop
2186/// cnt0 = init-val;
2187/// do {
2188/// x1 = phi (x0, x2);
2189/// cnt1 = phi(cnt0, cnt2);
2190///
2191/// cnt2 = cnt1 + 1;
2192/// ...
2193/// x2 = x1 & (x1 - 1);
2194/// ...
2195/// } while(x != 0);
2196///
2197/// loop-exit:
2198/// \endcode
2199static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
2200 Instruction *&CntInst, PHINode *&CntPhi,
2201 Value *&Var) {
2202 // step 1: Check to see if the look-back branch match this pattern:
2203 // "if (a!=0) goto loop-entry".
2204 BasicBlock *LoopEntry;
2205 Instruction *DefX2, *CountInst;
2206 Value *VarX1, *VarX0;
2207 PHINode *PhiX, *CountPhi;
2208
2209 DefX2 = CountInst = nullptr;
2210 VarX1 = VarX0 = nullptr;
2211 PhiX = CountPhi = nullptr;
2212 LoopEntry = *(CurLoop->block_begin());
2213
2214 // step 1: Check if the loop-back branch is in desirable form.
2215 {
2216 auto *LoopTerm = dyn_cast<CondBrInst>(LoopEntry->getTerminator());
2217 if (!LoopTerm)
2218 return false;
2219 DefX2 = dyn_cast_or_null<Instruction>(matchCondition(LoopTerm, LoopEntry));
2220 }
2221
2222 // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
2223 {
2224 if (!DefX2 || DefX2->getOpcode() != Instruction::And)
2225 return false;
2226
2227 BinaryOperator *SubOneOp;
2228
2229 if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
2230 VarX1 = DefX2->getOperand(1);
2231 else {
2232 VarX1 = DefX2->getOperand(0);
2233 SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
2234 }
2235 if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
2236 return false;
2237
2238 ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
2239 if (!Dec ||
2240 !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
2241 (SubOneOp->getOpcode() == Instruction::Add &&
2242 Dec->isMinusOne()))) {
2243 return false;
2244 }
2245 }
2246
2247 // step 3: Check the recurrence of variable X
2248 PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
2249 if (!PhiX)
2250 return false;
2251
2252 // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
2253 {
2254 CountInst = nullptr;
2255 for (Instruction &Inst :
2256 llvm::make_range(LoopEntry->getFirstNonPHIIt(), LoopEntry->end())) {
2257 if (Inst.getOpcode() != Instruction::Add)
2258 continue;
2259
2261 if (!Inc || !Inc->isOne())
2262 continue;
2263
2264 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
2265 if (!Phi)
2266 continue;
2267
2268 // Check if the result of the instruction is live of the loop.
2269 bool LiveOutLoop = false;
2270 for (User *U : Inst.users()) {
2271 if ((cast<Instruction>(U))->getParent() != LoopEntry) {
2272 LiveOutLoop = true;
2273 break;
2274 }
2275 }
2276
2277 if (LiveOutLoop) {
2278 CountInst = &Inst;
2279 CountPhi = Phi;
2280 break;
2281 }
2282 }
2283
2284 if (!CountInst)
2285 return false;
2286 }
2287
2288 // step 5: check if the precondition is in this form:
2289 // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
2290 {
2291 auto *PreCondBr = dyn_cast<CondBrInst>(PreCondBB->getTerminator());
2292 if (!PreCondBr)
2293 return false;
2294 Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
2295 if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
2296 return false;
2297
2298 CntInst = CountInst;
2299 CntPhi = CountPhi;
2300 Var = T;
2301 }
2302
2303 return true;
2304}
2305
2306/// Return true if the idiom is detected in the loop.
2307///
2308/// Additionally:
2309/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
2310/// or nullptr if there is no such.
2311/// 2) \p CntPhi is set to the corresponding phi node
2312/// or nullptr if there is no such.
2313/// 3) \p Var is set to the value whose CTLZ could be used.
2314/// 4) \p DefX is set to the instruction calculating Loop exit condition.
2315///
2316/// The core idiom we are trying to detect is:
2317/// \code
2318/// if (x0 == 0)
2319/// goto loop-exit // the precondition of the loop
2320/// cnt0 = init-val;
2321/// do {
2322/// x = phi (x0, x.next); //PhiX
2323/// cnt = phi(cnt0, cnt.next);
2324///
2325/// cnt.next = cnt + 1;
2326/// ...
2327/// x.next = x >> 1; // DefX
2328/// ...
2329/// } while(x.next != 0);
2330///
2331/// loop-exit:
2332/// \endcode
2333static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
2334 Intrinsic::ID &IntrinID, Value *&InitX,
2335 Instruction *&CntInst, PHINode *&CntPhi,
2336 Instruction *&DefX) {
2337 BasicBlock *LoopEntry;
2338 Value *VarX = nullptr;
2339
2340 DefX = nullptr;
2341 CntInst = nullptr;
2342 CntPhi = nullptr;
2343 LoopEntry = *(CurLoop->block_begin());
2344
2345 // step 1: Check if the loop-back branch is in desirable form.
2346 auto *LoopTerm = dyn_cast<CondBrInst>(LoopEntry->getTerminator());
2347 if (!LoopTerm)
2348 return false;
2349 DefX = dyn_cast_or_null<Instruction>(matchCondition(LoopTerm, LoopEntry));
2350
2351 // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
2352 if (!DefX || !DefX->isShift())
2353 return false;
2354 IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
2355 Intrinsic::ctlz;
2357 if (!Shft || !Shft->isOne())
2358 return false;
2359 VarX = DefX->getOperand(0);
2360
2361 // step 3: Check the recurrence of variable X
2362 PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
2363 if (!PhiX)
2364 return false;
2365
2366 InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
2367
2368 // Make sure the initial value can't be negative otherwise the ashr in the
2369 // loop might never reach zero which would make the loop infinite.
2370 if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
2371 return false;
2372
2373 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
2374 // or cnt.next = cnt + -1.
2375 // TODO: We can skip the step. If loop trip count is known (CTLZ),
2376 // then all uses of "cnt.next" could be optimized to the trip count
2377 // plus "cnt0". Currently it is not optimized.
2378 // This step could be used to detect POPCNT instruction:
2379 // cnt.next = cnt + (x.next & 1)
2380 for (Instruction &Inst :
2381 llvm::make_range(LoopEntry->getFirstNonPHIIt(), LoopEntry->end())) {
2382 if (Inst.getOpcode() != Instruction::Add)
2383 continue;
2384
2386 if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
2387 continue;
2388
2389 PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
2390 if (!Phi)
2391 continue;
2392
2393 CntInst = &Inst;
2394 CntPhi = Phi;
2395 break;
2396 }
2397 if (!CntInst)
2398 return false;
2399
2400 return true;
2401}
2402
2403// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
2404// profitable if we delete the loop.
2405bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
2406 Value *InitX, bool ZeroCheck,
2407 size_t CanonicalSize) {
2408 const Value *Args[] = {InitX,
2409 ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
2410
2411 uint32_t HeaderSize = CurLoop->getHeader()->size();
2412
2413 IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
2414 InstructionCost Cost = TTI->getIntrinsicInstrCost(
2416 if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
2417 return false;
2418
2419 return true;
2420}
2421
2422/// Convert CTLZ / CTTZ idiom loop into countable loop.
2423/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
2424/// returns false.
2425bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
2426 Value *InitX, Instruction *DefX,
2427 PHINode *CntPhi,
2428 Instruction *CntInst) {
2429 bool IsCntPhiUsedOutsideLoop = false;
2430 for (User *U : CntPhi->users())
2431 if (!CurLoop->contains(cast<Instruction>(U))) {
2432 IsCntPhiUsedOutsideLoop = true;
2433 break;
2434 }
2435 bool IsCntInstUsedOutsideLoop = false;
2436 for (User *U : CntInst->users())
2437 if (!CurLoop->contains(cast<Instruction>(U))) {
2438 IsCntInstUsedOutsideLoop = true;
2439 break;
2440 }
2441 // If both CntInst and CntPhi are used outside the loop the profitability
2442 // is questionable.
2443 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
2444 return false;
2445
2446 // For some CPUs result of CTLZ(X) intrinsic is undefined
2447 // when X is 0. If we can not guarantee X != 0, we need to check this
2448 // when expand.
2449 bool ZeroCheck = false;
2450 // It is safe to assume Preheader exist as it was checked in
2451 // parent function RunOnLoop.
2452 BasicBlock *PH = CurLoop->getLoopPreheader();
2453
2454 // If we are using the count instruction outside the loop, make sure we
2455 // have a zero check as a precondition. Without the check the loop would run
2456 // one iteration for before any check of the input value. This means 0 and 1
2457 // would have identical behavior in the original loop and thus
2458 if (!IsCntPhiUsedOutsideLoop) {
2459 auto *PreCondBB = PH->getSinglePredecessor();
2460 if (!PreCondBB)
2461 return false;
2462 auto *PreCondBI = dyn_cast<CondBrInst>(PreCondBB->getTerminator());
2463 if (!PreCondBI)
2464 return false;
2465 if (matchCondition(PreCondBI, PH) != InitX)
2466 return false;
2467 ZeroCheck = true;
2468 }
2469
2470 // FFS idiom loop has only 6 instructions:
2471 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2472 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2473 // %shr = ashr %n.addr.0, 1
2474 // %tobool = icmp eq %shr, 0
2475 // %inc = add nsw %i.0, 1
2476 // br i1 %tobool
2477 size_t IdiomCanonicalSize = 6;
2478 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2479 return false;
2480
2481 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
2482 DefX->getDebugLoc(), ZeroCheck,
2483 IsCntPhiUsedOutsideLoop);
2484 return true;
2485}
2486
2487/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
2488/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
2489/// trip count returns true; otherwise, returns false.
2490bool LoopIdiomRecognize::recognizeAndInsertFFS() {
2491 // Give up if the loop has multiple blocks or multiple backedges.
2492 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2493 return false;
2494
2495 Intrinsic::ID IntrinID;
2496 Value *InitX;
2497 Instruction *DefX = nullptr;
2498 PHINode *CntPhi = nullptr;
2499 Instruction *CntInst = nullptr;
2500
2501 if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
2502 DefX))
2503 return false;
2504
2505 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2506}
2507
2508bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
2509 // Give up if the loop has multiple blocks or multiple backedges.
2510 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2511 return false;
2512
2513 Intrinsic::ID IntrinID;
2514 Value *InitX;
2515 Instruction *DefX = nullptr;
2516 PHINode *CntPhi = nullptr;
2517 Instruction *CntInst = nullptr;
2518
2519 APInt LoopThreshold;
2520 if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
2521 CntPhi, DefX, LoopThreshold))
2522 return false;
2523
2524 if (LoopThreshold == 2) {
2525 // Treat as regular FFS.
2526 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2527 }
2528
2529 // Look for Floor Log2 Idiom.
2530 if (LoopThreshold != 4)
2531 return false;
2532
2533 // Abort if CntPhi is used outside of the loop.
2534 for (User *U : CntPhi->users())
2535 if (!CurLoop->contains(cast<Instruction>(U)))
2536 return false;
2537
2538 // It is safe to assume Preheader exist as it was checked in
2539 // parent function RunOnLoop.
2540 BasicBlock *PH = CurLoop->getLoopPreheader();
2541 auto *PreCondBB = PH->getSinglePredecessor();
2542 if (!PreCondBB)
2543 return false;
2544 auto *PreCondBI = dyn_cast<CondBrInst>(PreCondBB->getTerminator());
2545 if (!PreCondBI)
2546 return false;
2547
2548 APInt PreLoopThreshold;
2549 if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
2550 PreLoopThreshold != 2)
2551 return false;
2552
2553 bool ZeroCheck = true;
2554
2555 // the loop has only 6 instructions:
2556 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2557 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2558 // %shr = ashr %n.addr.0, 1
2559 // %tobool = icmp ult %n.addr.0, C
2560 // %inc = add nsw %i.0, 1
2561 // br i1 %tobool
2562 size_t IdiomCanonicalSize = 6;
2563 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
2564 return false;
2565
2566 // log2(x) = w − 1 − clz(x)
2567 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
2568 DefX->getDebugLoc(), ZeroCheck,
2569 /*IsCntPhiUsedOutsideLoop=*/false,
2570 /*InsertSub=*/true);
2571 return true;
2572}
2573
2574/// Recognizes a population count idiom in a non-countable loop.
2575///
2576/// If detected, transforms the relevant code to issue the popcount intrinsic
2577/// function call, and returns true; otherwise, returns false.
2578bool LoopIdiomRecognize::recognizePopcount() {
2579 if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
2580 return false;
2581
2582 // Counting population are usually conducted by few arithmetic instructions.
2583 // Such instructions can be easily "absorbed" by vacant slots in a
2584 // non-compact loop. Therefore, recognizing popcount idiom only makes sense
2585 // in a compact loop.
2586
2587 // Give up if the loop has multiple blocks or multiple backedges.
2588 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2589 return false;
2590
2591 BasicBlock *LoopBody = *(CurLoop->block_begin());
2592 if (LoopBody->size() >= 20) {
2593 // The loop is too big, bail out.
2594 return false;
2595 }
2596
2597 // It should have a preheader containing nothing but an unconditional branch.
2598 BasicBlock *PH = CurLoop->getLoopPreheader();
2599 if (!PH || &PH->front() != PH->getTerminator())
2600 return false;
2601 auto *EntryBI = dyn_cast<UncondBrInst>(PH->getTerminator());
2602 if (!EntryBI)
2603 return false;
2604
2605 // It should have a precondition block where the generated popcount intrinsic
2606 // function can be inserted.
2607 auto *PreCondBB = PH->getSinglePredecessor();
2608 if (!PreCondBB)
2609 return false;
2610 auto *PreCondBI = dyn_cast<CondBrInst>(PreCondBB->getTerminator());
2611 if (!PreCondBI)
2612 return false;
2613
2614 Instruction *CntInst;
2615 PHINode *CntPhi;
2616 Value *Val;
2617 if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
2618 return false;
2619
2620 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
2621 return true;
2622}
2623
2625 const DebugLoc &DL) {
2626 Value *Ops[] = {Val};
2627 Type *Tys[] = {Val->getType()};
2628
2630 return IRBuilder.CreateIntrinsic(Intrinsic::ctpop, Tys, Ops);
2631}
2632
2634 const DebugLoc &DL, bool ZeroCheck,
2635 Intrinsic::ID IID) {
2636 Value *Ops[] = {Val, IRBuilder.getInt1(ZeroCheck)};
2637 Type *Tys[] = {Val->getType()};
2638
2640 return IRBuilder.CreateIntrinsic(IID, Tys, Ops);
2641}
2642
2643/// Transform the following loop (Using CTLZ, CTTZ is similar):
2644/// loop:
2645/// CntPhi = PHI [Cnt0, CntInst]
2646/// PhiX = PHI [InitX, DefX]
2647/// CntInst = CntPhi + 1
2648/// DefX = PhiX >> 1
2649/// LOOP_BODY
2650/// Br: loop if (DefX != 0)
2651/// Use(CntPhi) or Use(CntInst)
2652///
2653/// Into:
2654/// If CntPhi used outside the loop:
2655/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
2656/// Count = CountPrev + 1
2657/// else
2658/// Count = BitWidth(InitX) - CTLZ(InitX)
2659/// loop:
2660/// CntPhi = PHI [Cnt0, CntInst]
2661/// PhiX = PHI [InitX, DefX]
2662/// PhiCount = PHI [Count, Dec]
2663/// CntInst = CntPhi + 1
2664/// DefX = PhiX >> 1
2665/// Dec = PhiCount - 1
2666/// LOOP_BODY
2667/// Br: loop if (Dec != 0)
2668/// Use(CountPrev + Cnt0) // Use(CntPhi)
2669/// or
2670/// Use(Count + Cnt0) // Use(CntInst)
2671///
2672/// If LOOP_BODY is empty the loop will be deleted.
2673/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
2674void LoopIdiomRecognize::transformLoopToCountable(
2675 Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
2676 PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
2677 bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
2678 // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
2679 IRBuilder<> Builder(Preheader->getTerminator());
2680 Builder.SetCurrentDebugLocation(DL);
2681
2682 // If there are no uses of CntPhi crate:
2683 // Count = BitWidth - CTLZ(InitX);
2684 // NewCount = Count;
2685 // If there are uses of CntPhi create:
2686 // NewCount = BitWidth - CTLZ(InitX >> 1);
2687 // Count = NewCount + 1;
2688 Value *InitXNext;
2689 if (IsCntPhiUsedOutsideLoop) {
2690 if (DefX->getOpcode() == Instruction::AShr)
2691 InitXNext = Builder.CreateAShr(InitX, 1);
2692 else if (DefX->getOpcode() == Instruction::LShr)
2693 InitXNext = Builder.CreateLShr(InitX, 1);
2694 else if (DefX->getOpcode() == Instruction::Shl) // cttz
2695 InitXNext = Builder.CreateShl(InitX, 1);
2696 else
2697 llvm_unreachable("Unexpected opcode!");
2698 } else
2699 InitXNext = InitX;
2700 Value *Count =
2701 createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
2702 Type *CountTy = Count->getType();
2703 Count = Builder.CreateSub(
2704 ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
2705 if (InsertSub)
2706 Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
2707 Value *NewCount = Count;
2708 if (IsCntPhiUsedOutsideLoop)
2709 Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
2710
2711 NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());
2712
2713 Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
2714 if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
2715 // If the counter was being incremented in the loop, add NewCount to the
2716 // counter's initial value, but only if the initial value is not zero.
2717 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2718 if (!InitConst || !InitConst->isZero())
2719 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2720 } else {
2721 // If the count was being decremented in the loop, subtract NewCount from
2722 // the counter's initial value.
2723 NewCount = Builder.CreateSub(CntInitVal, NewCount);
2724 }
2725
2726 // Step 2: Insert new IV and loop condition:
2727 // loop:
2728 // ...
2729 // PhiCount = PHI [Count, Dec]
2730 // ...
2731 // Dec = PhiCount - 1
2732 // ...
2733 // Br: loop if (Dec != 0)
2734 BasicBlock *Body = *(CurLoop->block_begin());
2735 auto *LbBr = cast<CondBrInst>(Body->getTerminator());
2736 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2737
2738 PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi");
2739 TcPhi->insertBefore(Body->begin());
2740
2741 Builder.SetInsertPoint(LbCond);
2742 Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
2743 TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));
2744
2745 TcPhi->addIncoming(Count, Preheader);
2746 TcPhi->addIncoming(TcDec, Body);
2747
2748 CmpInst::Predicate Pred =
2749 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
2750 LbCond->setPredicate(Pred);
2751 LbCond->setOperand(0, TcDec);
2752 LbCond->setOperand(1, ConstantInt::get(CountTy, 0));
2753
2754 // Step 3: All the references to the original counter outside
2755 // the loop are replaced with the NewCount
2756 if (IsCntPhiUsedOutsideLoop)
2757 CntPhi->replaceUsesOutsideBlock(NewCount, Body);
2758 else
2759 CntInst->replaceUsesOutsideBlock(NewCount, Body);
2760
2761 // step 4: Forget the "non-computable" trip-count SCEV associated with the
2762 // loop. The loop would otherwise not be deleted even if it becomes empty.
2763 SE->forgetLoop(CurLoop);
2764}
2765
2766void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
2767 Instruction *CntInst,
2768 PHINode *CntPhi, Value *Var) {
2769 BasicBlock *PreHead = CurLoop->getLoopPreheader();
2770 auto *PreCondBr = cast<CondBrInst>(PreCondBB->getTerminator());
2771 const DebugLoc &DL = CntInst->getDebugLoc();
2772
2773 // Assuming before transformation, the loop is following:
2774 // if (x) // the precondition
2775 // do { cnt++; x &= x - 1; } while(x);
2776
2777 // Step 1: Insert the ctpop instruction at the end of the precondition block
2778 IRBuilder<> Builder(PreCondBr);
2779 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
2780 {
2781 PopCnt = createPopcntIntrinsic(Builder, Var, DL);
2782 NewCount = PopCntZext =
2783 Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
2784
2785 if (NewCount != PopCnt)
2786 (cast<Instruction>(NewCount))->setDebugLoc(DL);
2787
2788 // TripCnt is exactly the number of iterations the loop has
2789 TripCnt = NewCount;
2790
2791 // If the population counter's initial value is not zero, insert Add Inst.
2792 Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
2793 ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
2794 if (!InitConst || !InitConst->isZero()) {
2795 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
2796 (cast<Instruction>(NewCount))->setDebugLoc(DL);
2797 }
2798 }
2799
2800 // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
2801 // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
2802 // function would be partial dead code, and downstream passes will drag
2803 // it back from the precondition block to the preheader.
2804 {
2805 ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
2806
2807 Value *Opnd0 = PopCntZext;
2808 Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
2809 if (PreCond->getOperand(0) != Var)
2810 std::swap(Opnd0, Opnd1);
2811
2812 ICmpInst *NewPreCond = cast<ICmpInst>(
2813 Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
2814 PreCondBr->setCondition(NewPreCond);
2815
2817 }
2818
2819 // Step 3: Note that the population count is exactly the trip count of the
2820 // loop in question, which enable us to convert the loop from noncountable
2821 // loop into a countable one. The benefit is twofold:
2822 //
2823 // - If the loop only counts population, the entire loop becomes dead after
2824 // the transformation. It is a lot easier to prove a countable loop dead
2825 // than to prove a noncountable one. (In some C dialects, an infinite loop
2826 // isn't dead even if it computes nothing useful. In general, DCE needs
2827 // to prove a noncountable loop finite before safely delete it.)
2828 //
2829 // - If the loop also performs something else, it remains alive.
2830 // Since it is transformed to countable form, it can be aggressively
2831 // optimized by some optimizations which are in general not applicable
2832 // to a noncountable loop.
2833 //
2834 // After this step, this loop (conceptually) would look like following:
2835 // newcnt = __builtin_ctpop(x);
2836 // t = newcnt;
2837 // if (x)
2838 // do { cnt++; x &= x-1; t--) } while (t > 0);
2839 BasicBlock *Body = *(CurLoop->block_begin());
2840 {
2841 auto *LbBr = cast<CondBrInst>(Body->getTerminator());
2842 ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
2843 Type *Ty = TripCnt->getType();
2844
2845 PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi");
2846 TcPhi->insertBefore(Body->begin());
2847
2848 Builder.SetInsertPoint(LbCond);
2850 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
2851 "tcdec", false, true));
2852
2853 TcPhi->addIncoming(TripCnt, PreHead);
2854 TcPhi->addIncoming(TcDec, Body);
2855
2856 CmpInst::Predicate Pred =
2857 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
2858 LbCond->setPredicate(Pred);
2859 LbCond->setOperand(0, TcDec);
2860 LbCond->setOperand(1, ConstantInt::get(Ty, 0));
2861 }
2862
2863 // Step 4: All the references to the original population counter outside
2864 // the loop are replaced with the NewCount -- the value returned from
2865 // __builtin_ctpop().
2866 CntInst->replaceUsesOutsideBlock(NewCount, Body);
2867
2868 // step 5: Forget the "non-computable" trip-count SCEV associated with the
2869 // loop. The loop would otherwise not be deleted even if it becomes empty.
2870 SE->forgetLoop(CurLoop);
2871}
2872
2873/// Match loop-invariant value.
2874template <typename SubPattern_t> struct match_LoopInvariant {
2875 SubPattern_t SubPattern;
2876 const Loop *L;
2877
2878 match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
2879 : SubPattern(SP), L(L) {}
2880
2881 template <typename ITy> bool match(ITy *V) const {
2882 return L->isLoopInvariant(V) && SubPattern.match(V);
2883 }
2884};
2885
2886/// Matches if the value is loop-invariant.
2887template <typename Ty>
2888inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
2889 return match_LoopInvariant<Ty>(M, L);
2890}
2891
2892/// Return true if the idiom is detected in the loop.
2893///
2894/// The core idiom we are trying to detect is:
2895/// \code
2896/// entry:
2897/// <...>
2898/// %bitmask = shl i32 1, %bitpos
2899/// br label %loop
2900///
2901/// loop:
2902/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
2903/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
2904/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
2905/// %x.next = shl i32 %x.curr, 1
2906/// <...>
2907/// br i1 %x.curr.isbitunset, label %loop, label %end
2908///
2909/// end:
2910/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
2911/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
2912/// <...>
2913/// \endcode
2914static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX,
2915 Value *&BitMask, Value *&BitPos,
2916 Value *&CurrX, Instruction *&NextX) {
2918 " Performing shift-until-bittest idiom detection.\n");
2919
2920 // Give up if the loop has multiple blocks or multiple backedges.
2921 if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
2922 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
2923 return false;
2924 }
2925
2926 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2927 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2928 assert(LoopPreheaderBB && "There is always a loop preheader.");
2929
2930 using namespace PatternMatch;
2931
2932 // Step 1: Check if the loop backedge is in desirable form.
2933
2934 CmpPredicate Pred;
2935 Value *CmpLHS, *CmpRHS;
2936 BasicBlock *TrueBB, *FalseBB;
2937 if (!match(LoopHeaderBB->getTerminator(),
2938 m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
2939 m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
2940 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
2941 return false;
2942 }
2943
2944 // Step 2: Check if the backedge's condition is in desirable form.
2945
2946 auto MatchVariableBitMask = [&]() {
2947 return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
2948 match(CmpLHS,
2949 m_c_And(m_Value(CurrX),
2951 m_Value(BitMask),
2952 m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
2953 CurLoop))));
2954 };
2955
2956 auto MatchDecomposableConstantBitMask = [&]() {
2957 auto Res = llvm::decomposeBitTestICmp(
2958 CmpLHS, CmpRHS, Pred, /*LookThroughTrunc=*/true,
2959 /*AllowNonZeroC=*/false, /*DecomposeAnd=*/true);
2960 if (Res && Res->Mask.isPowerOf2()) {
2961 assert(ICmpInst::isEquality(Res->Pred));
2962 Pred = Res->Pred;
2963 CurrX = Res->X;
2964 BitMask = ConstantInt::get(CurrX->getType(), Res->Mask);
2965 BitPos = ConstantInt::get(CurrX->getType(), Res->Mask.logBase2());
2966 return true;
2967 }
2968 return false;
2969 };
2970
2971 if (!MatchVariableBitMask() && !MatchDecomposableConstantBitMask()) {
2972 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
2973 return false;
2974 }
2975
2976 // Step 3: Check if the recurrence is in desirable form.
2977 auto *CurrXPN = dyn_cast<PHINode>(CurrX);
2978 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {
2979 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
2980 return false;
2981 }
2982
2983 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
2984 NextX =
2985 dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));
2986
2987 assert(CurLoop->isLoopInvariant(BaseX) &&
2988 "Expected BaseX to be available in the preheader!");
2989
2990 if (!NextX || !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
2991 // FIXME: support right-shift?
2992 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
2993 return false;
2994 }
2995
2996 // Step 4: Check if the backedge's destinations are in desirable form.
2997
2999 "Should only get equality predicates here.");
3000
3001 // cmp-br is commutative, so canonicalize to a single variant.
3002 if (Pred != ICmpInst::Predicate::ICMP_EQ) {
3003 Pred = ICmpInst::getInversePredicate(Pred);
3004 std::swap(TrueBB, FalseBB);
3005 }
3006
3007 // We expect to exit loop when comparison yields false,
3008 // so when it yields true we should branch back to loop header.
3009 if (TrueBB != LoopHeaderBB) {
3010 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
3011 return false;
3012 }
3013
3014 // Okay, idiom checks out.
3015 return true;
3016}
3017
3018/// Look for the following loop:
3019/// \code
3020/// entry:
3021/// <...>
3022/// %bitmask = shl i32 1, %bitpos
3023/// br label %loop
3024///
3025/// loop:
3026/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
3027/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
3028/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
3029/// %x.next = shl i32 %x.curr, 1
3030/// <...>
3031/// br i1 %x.curr.isbitunset, label %loop, label %end
3032///
3033/// end:
3034/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
3035/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
3036/// <...>
3037/// \endcode
3038///
3039/// And transform it into:
3040/// \code
3041/// entry:
3042/// %bitmask = shl i32 1, %bitpos
3043/// %lowbitmask = add i32 %bitmask, -1
3044/// %mask = or i32 %lowbitmask, %bitmask
3045/// %x.masked = and i32 %x, %mask
3046/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
3047/// i1 true)
3048/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
3049/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
3050/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
3051/// %tripcount = add i32 %backedgetakencount, 1
3052/// %x.curr = shl i32 %x, %backedgetakencount
3053/// %x.next = shl i32 %x, %tripcount
3054/// br label %loop
3055///
3056/// loop:
3057/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
3058/// %loop.iv.next = add nuw i32 %loop.iv, 1
3059/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
3060/// <...>
3061/// br i1 %loop.ivcheck, label %end, label %loop
3062///
3063/// end:
3064/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
3065/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
3066/// <...>
3067/// \endcode
3068bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
3069 bool MadeChange = false;
3070
3071 Value *X, *BitMask, *BitPos, *XCurr;
3072 Instruction *XNext;
3073 if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
3074 XNext)) {
3076 " shift-until-bittest idiom detection failed.\n");
3077 return MadeChange;
3078 }
3079 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");
3080
3081 // Ok, it is the idiom we were looking for, we *could* transform this loop,
3082 // but is it profitable to transform?
3083
3084 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
3085 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
3086 assert(LoopPreheaderBB && "There is always a loop preheader.");
3087
3088 BasicBlock *SuccessorBB = CurLoop->getExitBlock();
3089 assert(SuccessorBB && "There is only a single successor.");
3090
3091 IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
3092 Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
3093
3094 Intrinsic::ID IntrID = Intrinsic::ctlz;
3095 Type *Ty = X->getType();
3096 unsigned Bitwidth = Ty->getScalarSizeInBits();
3097
3100
3101 // The rewrite is considered to be unprofitable iff and only iff the
3102 // intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
3103 // making the loop countable, even if nothing else changes.
3105 IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getTrue()});
3106 InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
3109 " Intrinsic is too costly, not beneficial\n");
3110 return MadeChange;
3111 }
3112 if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
3114 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
3115 return MadeChange;
3116 }
3117
3118 // Ok, transform appears worthwhile.
3119 MadeChange = true;
3120
3121 if (!isGuaranteedNotToBeUndefOrPoison(BitPos)) {
3122 // BitMask may be computed from BitPos, Freeze BitPos so we can increase
3123 // it's use count.
3124 std::optional<BasicBlock::iterator> InsertPt = std::nullopt;
3125 if (auto *BitPosI = dyn_cast<Instruction>(BitPos))
3126 InsertPt = BitPosI->getInsertionPointAfterDef();
3127 else
3128 InsertPt = DT->getRoot()->getFirstNonPHIOrDbgOrAlloca();
3129 if (!InsertPt)
3130 return false;
3131 FreezeInst *BitPosFrozen =
3132 new FreezeInst(BitPos, BitPos->getName() + ".fr", *InsertPt);
3133 BitPos->replaceUsesWithIf(BitPosFrozen, [BitPosFrozen](Use &U) {
3134 return U.getUser() != BitPosFrozen;
3135 });
3136 BitPos = BitPosFrozen;
3137 }
3138
3139 // Step 1: Compute the loop trip count.
3140
3141 Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
3142 BitPos->getName() + ".lowbitmask");
3143 Value *Mask =
3144 Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
3145 Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
3146 Value *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
3147 IntrID, Ty, {XMasked, /*is_zero_poison=*/Builder.getTrue()},
3148 /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
3149 Value *XMaskedNumActiveBits = Builder.CreateSub(
3150 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
3151 XMasked->getName() + ".numactivebits", /*HasNUW=*/true,
3152 /*HasNSW=*/Bitwidth != 2);
3153 Value *XMaskedLeadingOnePos =
3154 Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
3155 XMasked->getName() + ".leadingonepos", /*HasNUW=*/false,
3156 /*HasNSW=*/Bitwidth > 2);
3157
3158 Value *LoopBackedgeTakenCount = Builder.CreateSub(
3159 BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",
3160 /*HasNUW=*/true, /*HasNSW=*/true);
3161 // We know loop's backedge-taken count, but what's loop's trip count?
3162 // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
3163 Value *LoopTripCount =
3164 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
3165 CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
3166 /*HasNSW=*/Bitwidth != 2);
3167
3168 // Step 2: Compute the recurrence's final value without a loop.
3169
3170 // NewX is always safe to compute, because `LoopBackedgeTakenCount`
3171 // will always be smaller than `bitwidth(X)`, i.e. we never get poison.
3172 Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
3173 NewX->takeName(XCurr);
3174 if (auto *I = dyn_cast<Instruction>(NewX))
3175 I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
3176
3177 Value *NewXNext;
3178 // Rewriting XNext is more complicated, however, because `X << LoopTripCount`
3179 // will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
3180 // iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
3181 // that isn't the case, we'll need to emit an alternative, safe IR.
3182 if (XNext->hasNoSignedWrap() || XNext->hasNoUnsignedWrap() ||
3186 Ty->getScalarSizeInBits() - 1))))
3187 NewXNext = Builder.CreateShl(X, LoopTripCount);
3188 else {
3189 // Otherwise, just additionally shift by one. It's the smallest solution,
3190 // alternatively, we could check that NewX is INT_MIN (or BitPos is )
3191 // and select 0 instead.
3192 NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
3193 }
3194
3195 NewXNext->takeName(XNext);
3196 if (auto *I = dyn_cast<Instruction>(NewXNext))
3197 I->copyIRFlags(XNext, /*IncludeWrapFlags=*/true);
3198
3199 // Step 3: Adjust the successor basic block to receive the computed
3200 // recurrence's final value instead of the recurrence itself.
3201
3202 XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
3203 XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);
3204
3205 // Step 4: Rewrite the loop into a countable form, with canonical IV.
3206
3207 // The new canonical induction variable.
3208 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
3209 auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
3210
3211 // The induction itself.
3212 // Note that while NUW is always safe, while NSW is only for bitwidths != 2.
3213 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
3214 auto *IVNext =
3215 Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
3216 /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
3217
3218 // The loop trip count check.
3219 auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
3220 CurLoop->getName() + ".ivcheck");
3221 SmallVector<uint32_t> BranchWeights;
3222 const bool HasBranchWeights =
3224 extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
3225
3226 auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
3227 if (HasBranchWeights) {
3228 if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
3229 std::swap(BranchWeights[0], BranchWeights[1]);
3230 // We're not changing the loop profile, so we can reuse the original loop's
3231 // profile.
3232 setBranchWeights(*BI, BranchWeights,
3233 /*IsExpected=*/false);
3234 }
3235
3236 LoopHeaderBB->getTerminator()->eraseFromParent();
3237
3238 // Populate the IV PHI.
3239 IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
3240 IV->addIncoming(IVNext, LoopHeaderBB);
3241
3242 // Step 5: Forget the "non-computable" trip-count SCEV associated with the
3243 // loop. The loop would otherwise not be deleted even if it becomes empty.
3244
3245 SE->forgetLoop(CurLoop);
3246
3247 // Other passes will take care of actually deleting the loop if possible.
3248
3249 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");
3250
3251 ++NumShiftUntilBitTest;
3252 return MadeChange;
3253}
3254
3255/// Return true if the idiom is detected in the loop.
3256///
3257/// The core idiom we are trying to detect is:
3258/// \code
3259/// entry:
3260/// <...>
3261/// %start = <...>
3262/// %extraoffset = <...>
3263/// <...>
3264/// br label %for.cond
3265///
3266/// loop:
3267/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
3268/// %nbits = add nsw i8 %iv, %extraoffset
3269/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
3270/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
3271/// %iv.next = add i8 %iv, 1
3272/// <...>
3273/// br i1 %val.shifted.iszero, label %end, label %loop
3274///
3275/// end:
3276/// %iv.res = phi i8 [ %iv, %loop ] <...>
3277/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
3278/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
3279/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
3280/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
3281/// <...>
3282/// \endcode
3284 Instruction *&ValShiftedIsZero,
3285 Intrinsic::ID &IntrinID, Instruction *&IV,
3286 Value *&Start, Value *&Val,
3287 const SCEV *&ExtraOffsetExpr,
3288 bool &InvertedCond) {
3290 " Performing shift-until-zero idiom detection.\n");
3291
3292 // Give up if the loop has multiple blocks or multiple backedges.
3293 if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
3294 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
3295 return false;
3296 }
3297
3298 Instruction *ValShifted, *NBits, *IVNext;
3299 Value *ExtraOffset;
3300
3301 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
3302 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
3303 assert(LoopPreheaderBB && "There is always a loop preheader.");
3304
3305 using namespace PatternMatch;
3306
3307 // Step 1: Check if the loop backedge, condition is in desirable form.
3308
3309 CmpPredicate Pred;
3310 BasicBlock *TrueBB, *FalseBB;
3311 if (!match(LoopHeaderBB->getTerminator(),
3312 m_Br(m_Instruction(ValShiftedIsZero), m_BasicBlock(TrueBB),
3313 m_BasicBlock(FalseBB))) ||
3314 !match(ValShiftedIsZero,
3315 m_ICmp(Pred, m_Instruction(ValShifted), m_Zero())) ||
3316 !ICmpInst::isEquality(Pred)) {
3317 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
3318 return false;
3319 }
3320
3321 // Step 2: Check if the comparison's operand is in desirable form.
3322 // FIXME: Val could be a one-input PHI node, which we should look past.
3323 if (!match(ValShifted, m_Shift(m_LoopInvariant(m_Value(Val), CurLoop),
3324 m_Instruction(NBits)))) {
3325 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad comparisons value computation.\n");
3326 return false;
3327 }
3328 IntrinID = ValShifted->getOpcode() == Instruction::Shl ? Intrinsic::cttz
3329 : Intrinsic::ctlz;
3330
3331 // Step 3: Check if the shift amount is in desirable form.
3332
3333 if (match(NBits, m_c_Add(m_Instruction(IV),
3334 m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
3335 (NBits->hasNoSignedWrap() || NBits->hasNoUnsignedWrap()))
3336 ExtraOffsetExpr = SE->getNegativeSCEV(SE->getSCEV(ExtraOffset));
3337 else if (match(NBits,
3339 m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
3340 NBits->hasNoSignedWrap())
3341 ExtraOffsetExpr = SE->getSCEV(ExtraOffset);
3342 else {
3343 IV = NBits;
3344 ExtraOffsetExpr = SE->getZero(NBits->getType());
3345 }
3346
3347 // Step 4: Check if the recurrence is in desirable form.
3348 auto *IVPN = dyn_cast<PHINode>(IV);
3349 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
3350 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
3351 return false;
3352 }
3353
3354 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
3355 IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
3356
3357 if (!IVNext || !match(IVNext, m_Add(m_Specific(IVPN), m_One()))) {
3358 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
3359 return false;
3360 }
3361
3362 // Step 4: Check if the backedge's destinations are in desirable form.
3363
3365 "Should only get equality predicates here.");
3366
3367 // cmp-br is commutative, so canonicalize to a single variant.
3368 InvertedCond = Pred != ICmpInst::Predicate::ICMP_EQ;
3369 if (InvertedCond) {
3370 Pred = ICmpInst::getInversePredicate(Pred);
3371 std::swap(TrueBB, FalseBB);
3372 }
3373
3374 // We expect to exit loop when comparison yields true,
3375 // so when it yields false we should branch back to loop header.
3376 if (FalseBB != LoopHeaderBB) {
3377 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
3378 return false;
3379 }
3380
3381 // The new, countable, loop will certainly only run a known number of
3382 // iterations, It won't be infinite. But the old loop might be infinite
3383 // under certain conditions. For logical shifts, the value will become zero
3384 // after at most bitwidth(%Val) loop iterations. However, for arithmetic
3385 // right-shift, iff the sign bit was set, the value will never become zero,
3386 // and the loop may never finish.
3387 if (ValShifted->getOpcode() == Instruction::AShr &&
3388 !isMustProgress(CurLoop) && !SE->isKnownNonNegative(SE->getSCEV(Val))) {
3389 LLVM_DEBUG(dbgs() << DEBUG_TYPE " Can not prove the loop is finite.\n");
3390 return false;
3391 }
3392
3393 // Okay, idiom checks out.
3394 return true;
3395}
3396
3397/// Look for the following loop:
3398/// \code
3399/// entry:
3400/// <...>
3401/// %start = <...>
3402/// %extraoffset = <...>
3403/// <...>
3404/// br label %loop
3405///
3406/// loop:
3407/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
3408/// %nbits = add nsw i8 %iv, %extraoffset
3409/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
3410/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
3411/// %iv.next = add i8 %iv, 1
3412/// <...>
3413/// br i1 %val.shifted.iszero, label %end, label %loop
3414///
3415/// end:
3416/// %iv.res = phi i8 [ %iv, %loop ] <...>
3417/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
3418/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
3419/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
3420/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
3421/// <...>
3422/// \endcode
3423///
3424/// And transform it into:
3425/// \code
3426/// entry:
3427/// <...>
3428/// %start = <...>
3429/// %extraoffset = <...>
3430/// <...>
3431/// %val.numleadingzeros = call i8 @llvm.ct{l,t}z.i8(i8 %val, i1 0)
3432/// %val.numactivebits = sub i8 8, %val.numleadingzeros
3433/// %extraoffset.neg = sub i8 0, %extraoffset
3434/// %tmp = add i8 %val.numactivebits, %extraoffset.neg
3435/// %iv.final = call i8 @llvm.smax.i8(i8 %tmp, i8 %start)
3436/// %loop.tripcount = sub i8 %iv.final, %start
3437/// br label %loop
3438///
3439/// loop:
3440/// %loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
3441/// %loop.iv.next = add i8 %loop.iv, 1
3442/// %loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
3443/// %iv = add i8 %loop.iv, %start
3444/// <...>
3445/// br i1 %loop.ivcheck, label %end, label %loop
3446///
3447/// end:
3448/// %iv.res = phi i8 [ %iv.final, %loop ] <...>
3449/// <...>
3450/// \endcode
3451bool LoopIdiomRecognize::recognizeShiftUntilZero() {
3452 bool MadeChange = false;
3453
3454 Instruction *ValShiftedIsZero;
3455 Intrinsic::ID IntrID;
3456 Instruction *IV;
3457 Value *Start, *Val;
3458 const SCEV *ExtraOffsetExpr;
3459 bool InvertedCond;
3460 if (!detectShiftUntilZeroIdiom(CurLoop, SE, ValShiftedIsZero, IntrID, IV,
3461 Start, Val, ExtraOffsetExpr, InvertedCond)) {
3463 " shift-until-zero idiom detection failed.\n");
3464 return MadeChange;
3465 }
3466 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom detected!\n");
3467
3468 // Ok, it is the idiom we were looking for, we *could* transform this loop,
3469 // but is it profitable to transform?
3470
3471 BasicBlock *LoopHeaderBB = CurLoop->getHeader();
3472 BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
3473 assert(LoopPreheaderBB && "There is always a loop preheader.");
3474
3475 BasicBlock *SuccessorBB = CurLoop->getExitBlock();
3476 assert(SuccessorBB && "There is only a single successor.");
3477
3478 IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
3479 Builder.SetCurrentDebugLocation(IV->getDebugLoc());
3480
3481 Type *Ty = Val->getType();
3482 unsigned Bitwidth = Ty->getScalarSizeInBits();
3483
3486
3487 // The rewrite is considered to be unprofitable iff and only iff the
3488 // intrinsic we'll use are not cheap. Note that we are okay with *just*
3489 // making the loop countable, even if nothing else changes.
3491 IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()});
3492 InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
3495 " Intrinsic is too costly, not beneficial\n");
3496 return MadeChange;
3497 }
3498
3499 // Ok, transform appears worthwhile.
3500 MadeChange = true;
3501
3502 bool OffsetIsZero = ExtraOffsetExpr->isZero();
3503
3504 // Step 1: Compute the loop's final IV value / trip count.
3505
3506 Value *ValNumLeadingZeros = Builder.CreateIntrinsic(
3507 IntrID, Ty, {Val, /*is_zero_poison=*/Builder.getFalse()},
3508 /*FMFSource=*/nullptr, Val->getName() + ".numleadingzeros");
3509 Value *ValNumActiveBits = Builder.CreateSub(
3510 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros,
3511 Val->getName() + ".numactivebits", /*HasNUW=*/true,
3512 /*HasNSW=*/Bitwidth != 2);
3513
3514 SCEVExpander Expander(*SE, "loop-idiom");
3515 Expander.setInsertPoint(&*Builder.GetInsertPoint());
3516 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
3517
3518 Value *ValNumActiveBitsOffset = Builder.CreateAdd(
3519 ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",
3520 /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true);
3521 Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
3522 {ValNumActiveBitsOffset, Start},
3523 /*FMFSource=*/nullptr, "iv.final");
3524
3525 auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
3526 IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
3527 /*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
3528 // FIXME: or when the offset was `add nuw`
3529
3530 // We know loop's backedge-taken count, but what's loop's trip count?
3531 Value *LoopTripCount =
3532 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
3533 CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
3534 /*HasNSW=*/Bitwidth != 2);
3535
3536 // Step 2: Adjust the successor basic block to receive the original
3537 // induction variable's final value instead of the orig. IV itself.
3538
3539 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
3540
3541 // Step 3: Rewrite the loop into a countable form, with canonical IV.
3542
3543 // The new canonical induction variable.
3544 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());
3545 auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
3546
3547 // The induction itself.
3548 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->getFirstNonPHIIt());
3549 auto *CIVNext =
3550 Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
3551 /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
3552
3553 // The loop trip count check.
3554 auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
3555 CurLoop->getName() + ".ivcheck");
3556 auto *NewIVCheck = CIVCheck;
3557 if (InvertedCond) {
3558 NewIVCheck = Builder.CreateNot(CIVCheck);
3559 NewIVCheck->takeName(ValShiftedIsZero);
3560 }
3561
3562 // The original IV, but rebased to be an offset to the CIV.
3563 auto *IVDePHId = Builder.CreateAdd(CIV, Start, "", /*HasNUW=*/false,
3564 /*HasNSW=*/true); // FIXME: what about NUW?
3565 IVDePHId->takeName(IV);
3566
3567 // The loop terminator.
3568 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
3569 SmallVector<uint32_t> BranchWeights;
3570 const bool HasBranchWeights =
3572 extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
3573
3574 auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
3575 if (HasBranchWeights) {
3576 if (InvertedCond)
3577 std::swap(BranchWeights[0], BranchWeights[1]);
3578 // We're not changing the loop profile, so we can reuse the original loop's
3579 // profile.
3580 setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
3581 }
3582 LoopHeaderBB->getTerminator()->eraseFromParent();
3583
3584 // Populate the IV PHI.
3585 CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
3586 CIV->addIncoming(CIVNext, LoopHeaderBB);
3587
3588 // Step 4: Forget the "non-computable" trip-count SCEV associated with the
3589 // loop. The loop would otherwise not be deleted even if it becomes empty.
3590
3591 SE->forgetLoop(CurLoop);
3592
3593 // Step 5: Try to cleanup the loop's body somewhat.
3594 IV->replaceAllUsesWith(IVDePHId);
3595 IV->eraseFromParent();
3596
3597 ValShiftedIsZero->replaceAllUsesWith(NewIVCheck);
3598 ValShiftedIsZero->eraseFromParent();
3599
3600 // Other passes will take care of actually deleting the loop if possible.
3601
3602 LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom optimized!\n");
3603
3604 ++NumShiftUntilZero;
3605 return MadeChange;
3606}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
DXIL Resource Access
This file defines the DenseMap class.
#define DEBUG_TYPE
ManagedStatic< HTTPClientCleanup > Cleanup
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
static Value * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
static Value * matchShiftULTCondition(CondBrInst *BI, BasicBlock *LoopEntry, APInt &Threshold)
Check if the given conditional branch is based on an unsigned less-than comparison between a variable...
static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX, APInt &Threshold)
Return true if the idiom is detected in the loop.
static Value * matchCondition(CondBrInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero,...
static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)
Return true if the idiom is detected in the loop.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset....
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
static Value * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)
Matches if the value is loop-invariant.
static bool isSameByteValueStore(Instruction &I, Value *SplatByte, Loop *L, const DataLayout &DL)
Return true if I is a (simple, loop-invariant-valued) store of the same bytewise value SplatByte.
static void deleteDeadInstruction(Instruction *I)
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
if(PassOpts->AAPipeline)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isSimple(Instruction *I)
verify safepoint Safepoint IR Verifier
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
std::optional< uint64_t > tryZExtValue() const
Get zero extended value if possible.
Definition APInt.h:1575
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
size_t size() const
Definition BasicBlock.h:482
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
BinaryOps getOpcode() const
Definition InstrTypes.h:409
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
This class represents a function call, abstracting a target machine's calling convention.
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition InstrTypes.h:831
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
A debug info location.
Definition DebugLoc.h:126
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
PointerType * getType() const
Global values are always pointers.
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
static LLVM_ABI CRCTable genSarwateTable(const APInt &GenPoly, bool IsBigEndian)
Generate a lookup table of 256 entries by interleaving the generating polynomial.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition IRBuilder.h:452
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2128
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Definition IRBuilder.h:221
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
bool isUnordered() const
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
bool isPrecise() const
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
block_iterator block_begin() const
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LLVM_ABI PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:659
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:67
ICmpInst * getLatchCmpInst() const
Get the latch condition instruction.
Definition LoopInfo.cpp:198
StringRef getName() const
Definition LoopInfo.h:407
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition LoopInfo.cpp:174
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isForceInlined() const
bool isVolatile() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.
This class uses information about analyze scalars to rewrite expressions in canonical form.
SCEVUse getOperand(unsigned i) const
This class represents an analyzed expression in the program.
LLVM_ABI bool isOne() const
Return true if the expression is a constant one.
static constexpr auto FlagNUW
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
Value * getValueOperand()
Value * getPointerOperand()
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
unsigned getWCharSize(const Module &M) const
Returns the size of the wchar_t type in bytes or 0 if the size is unknown.
bool has(LibFunc F) const
Tests whether a library function is available.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Basic
The cost of a typical 'add' instruction.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:270
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition Value.cpp:611
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition Value.cpp:561
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ HeaderSize
Definition BTF.h:61
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OperandType
Operands are tagged with one of the values of this enum.
Definition MCInstrDesc.h:59
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
auto m_BasicBlock()
Match an arbitrary basic block value and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
specificscev_ty m_scev_Specific(const SCEV *S)
Match if we have a specific specified SCEV.
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
static cl::opt< bool, true > EnableLIRPWcslen("disable-loop-idiom-wcslen", cl::desc("Proceed with loop idiom recognize pass, " "enable conversion of loop(s) to wcslen."), cl::location(DisableLIRP::Wcslen), cl::init(false), cl::ReallyHidden)
InstructionCost Cost
static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static cl::opt< bool, true > DisableLIRPStrlen("disable-loop-idiom-strlen", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to strlen."), cl::location(DisableLIRP::Strlen), cl::init(false), cl::ReallyHidden)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
static cl::opt< bool > ForceMemsetPatternIntrinsic("loop-idiom-force-memset-pattern-intrinsic", cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false), cl::Hidden)
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
LLVM_ABI bool isMustProgress(const Loop *L)
Return true if this loop can be assumed to make progress.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI Value * emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the strlen function to the builder, for the specified pointer.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
static cl::opt< bool, true > DisableLIRPHashRecognize("disable-" DEBUG_TYPE "-hashrecognize", cl::desc("Proceed with loop idiom recognize pass, " "but do not optimize CRC loops."), cl::location(DisableLIRP::HashRecognize), cl::init(false), cl::ReallyHidden)
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
Definition InstrProf.h:145
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI Value * emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the wcslen function to the builder, for the specified pointer.
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling " "with -Os/-Oz"), cl::init(true), cl::Hidden)
LLVM_ABI bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition Local.cpp:643
static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)
static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI std::optional< DecomposedBitTest > decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, bool LookThroughTrunc=true, bool AllowNonZeroC=false, bool DecomposeAnd=false)
Decompose an icmp into the form ((X & Mask) pred C) if possible.
SCEVUseT< const SCEV * > SCEVUse
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
AAMDNodes extendTo(ssize_t Len) const
Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...
Definition Metadata.h:836
static LLVM_ABI bool Memcpy
When true, Memcpy is disabled.
static LLVM_ABI bool Wcslen
When true, Wcslen is disabled.
static LLVM_ABI bool Strlen
When true, Strlen is disabled.
static LLVM_ABI bool HashRecognize
When true, HashRecognize is disabled.
static LLVM_ABI bool Memset
When true, Memset is disabled.
static LLVM_ABI bool All
When true, the entire pass is disabled.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
The structure that is returned when a polynomial algorithm was recognized by the analysis.
Match loop-invariant value.
match_LoopInvariant(const SubPattern_t &SP, const Loop *L)