LLVM 23.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
45#include "llvm/Analysis/Loads.h"
54#include "llvm/IR/Argument.h"
56#include "llvm/IR/BasicBlock.h"
57#include "llvm/IR/Constant.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/DataLayout.h"
61#include "llvm/IR/DebugInfo.h"
62#include "llvm/IR/Dominators.h"
63#include "llvm/IR/Function.h"
64#include "llvm/IR/IRBuilder.h"
66#include "llvm/IR/InstrTypes.h"
67#include "llvm/IR/Instruction.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/PassManager.h"
73#include "llvm/IR/Value.h"
77#include "llvm/Support/Debug.h"
85#include <algorithm>
86#include <cassert>
87#include <cstdint>
88#include <map>
89#include <optional>
90#include <utility>
91
92using namespace llvm;
93using namespace PatternMatch;
94
95#define DEBUG_TYPE "dse"
96
97STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
98STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
99STATISTIC(NumFastStores, "Number of stores deleted");
100STATISTIC(NumFastOther, "Number of other instrs removed");
101STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
102STATISTIC(NumModifiedStores, "Number of stores modified");
103STATISTIC(NumCFGChecks, "Number of stores modified");
104STATISTIC(NumCFGTries, "Number of stores modified");
105STATISTIC(NumCFGSuccess, "Number of stores modified");
106STATISTIC(NumGetDomMemoryDefPassed,
107 "Number of times a valid candidate is returned from getDomMemoryDef");
108STATISTIC(NumDomMemDefChecks,
109 "Number iterations check for reads in getDomMemoryDef");
110
111DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
112 "Controls which MemoryDefs are eliminated.");
113
114static cl::opt<bool>
115EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
116 cl::init(true), cl::Hidden,
117 cl::desc("Enable partial-overwrite tracking in DSE"));
118
119static cl::opt<bool>
120EnablePartialStoreMerging("enable-dse-partial-store-merging",
121 cl::init(true), cl::Hidden,
122 cl::desc("Enable partial store merging in DSE"));
123
125 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
126 cl::desc("The number of memory instructions to scan for "
127 "dead store elimination (default = 150)"));
129 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
130 cl::desc("The maximum number of steps while walking upwards to find "
131 "MemoryDefs that may be killed (default = 90)"));
132
134 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
135 cl::desc("The maximum number candidates that only partially overwrite the "
136 "killing MemoryDef to consider"
137 " (default = 5)"));
138
140 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
141 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
142 "other stores per basic block (default = 5000)"));
143
145 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
146 cl::desc(
147 "The cost of a step in the same basic block as the killing MemoryDef"
148 "(default = 1)"));
149
151 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
153 cl::desc("The cost of a step in a different basic "
154 "block than the killing MemoryDef"
155 "(default = 5)"));
156
158 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
159 cl::desc("The maximum number of blocks to check when trying to prove that "
160 "all paths to an exit go through a killing block (default = 50)"));
161
162// This flags allows or disallows DSE to optimize MemorySSA during its
163// traversal. Note that DSE optimizing MemorySSA may impact other passes
164// downstream of the DSE invocation and can lead to issues not being
165// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
166// those cases, the flag can be used to check if DSE's MemorySSA optimizations
167// impact follow-up passes.
168static cl::opt<bool>
169 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
170 cl::desc("Allow DSE to optimize memory accesses."));
171
172// TODO: remove this flag.
174 "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden,
175 cl::desc("Enable the initializes attr improvement in DSE"));
176
177//===----------------------------------------------------------------------===//
178// Helper functions
179//===----------------------------------------------------------------------===//
180using OverlapIntervalsTy = std::map<int64_t, int64_t>;
182
183/// Returns true if the end of this instruction can be safely shortened in
184/// length.
186 // Don't shorten stores for now
187 if (isa<StoreInst>(I))
188 return false;
189
191 switch (II->getIntrinsicID()) {
192 default: return false;
193 case Intrinsic::memset:
194 case Intrinsic::memcpy:
195 case Intrinsic::memcpy_element_unordered_atomic:
196 case Intrinsic::memset_element_unordered_atomic:
197 // Do shorten memory intrinsics.
198 // FIXME: Add memmove if it's also safe to transform.
199 return true;
200 }
201 }
202
203 // Don't shorten libcalls calls for now.
204
205 return false;
206}
207
208/// Returns true if the beginning of this instruction can be safely shortened
209/// in length.
211 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
212 // easily done by offsetting the source address.
213 return isa<AnyMemSetInst>(I);
214}
215
216static std::optional<TypeSize> getPointerSize(const Value *V,
217 const DataLayout &DL,
218 const TargetLibraryInfo &TLI,
219 const Function *F) {
221 ObjectSizeOpts Opts;
223
224 if (getObjectSize(V, Size, DL, &TLI, Opts))
225 return TypeSize::getFixed(Size);
226 return std::nullopt;
227}
228
229namespace {
230
231enum OverwriteResult {
232 OW_Begin,
233 OW_Complete,
234 OW_End,
235 OW_PartialEarlierWithFullLater,
236 OW_MaybePartial,
237 OW_None,
238 OW_Unknown
239};
240
241} // end anonymous namespace
242
243/// Check if two instruction are masked stores that completely
244/// overwrite one another. More specifically, \p KillingI has to
245/// overwrite \p DeadI.
246static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
247 const Instruction *DeadI,
249 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
250 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
251 if (KillingII == nullptr || DeadII == nullptr)
252 return OW_Unknown;
253 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
254 return OW_Unknown;
255
256 switch (KillingII->getIntrinsicID()) {
257 case Intrinsic::masked_store:
258 case Intrinsic::vp_store: {
259 const DataLayout &DL = KillingII->getDataLayout();
260 auto *KillingTy = KillingII->getArgOperand(0)->getType();
261 auto *DeadTy = DeadII->getArgOperand(0)->getType();
262 if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy))
263 return OW_Unknown;
264 // Element count.
265 if (cast<VectorType>(KillingTy)->getElementCount() !=
266 cast<VectorType>(DeadTy)->getElementCount())
267 return OW_Unknown;
268 // Pointers.
269 Value *KillingPtr = KillingII->getArgOperand(1);
270 Value *DeadPtr = DeadII->getArgOperand(1);
271 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
272 return OW_Unknown;
273 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
274 // Masks.
275 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
276 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
277 return OW_Unknown;
278 } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
279 // Masks.
280 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
281 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
282 return OW_Unknown;
283 // Lengths.
284 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
285 return OW_Unknown;
286 }
287 return OW_Complete;
288 }
289 default:
290 return OW_Unknown;
291 }
292}
293
294/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
295/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
296/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
297/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
298/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
299/// overwritten by a killing (smaller) store which doesn't write outside the big
300/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
301/// NOTE: This function must only be called if both \p KillingLoc and \p
302/// DeadLoc belong to the same underlying object with valid \p KillingOff and
303/// \p DeadOff.
304static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
305 const MemoryLocation &DeadLoc,
306 int64_t KillingOff, int64_t DeadOff,
307 Instruction *DeadI,
309 const uint64_t KillingSize = KillingLoc.Size.getValue();
310 const uint64_t DeadSize = DeadLoc.Size.getValue();
311 // We may now overlap, although the overlap is not complete. There might also
312 // be other incomplete overlaps, and together, they might cover the complete
313 // dead store.
314 // Note: The correctness of this logic depends on the fact that this function
315 // is not even called providing DepWrite when there are any intervening reads.
317 KillingOff < int64_t(DeadOff + DeadSize) &&
318 int64_t(KillingOff + KillingSize) >= DeadOff) {
319
320 // Insert our part of the overlap into the map.
321 auto &IM = IOL[DeadI];
322 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
323 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
324 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
325 << ")\n");
326
327 // Make sure that we only insert non-overlapping intervals and combine
328 // adjacent intervals. The intervals are stored in the map with the ending
329 // offset as the key (in the half-open sense) and the starting offset as
330 // the value.
331 int64_t KillingIntStart = KillingOff;
332 int64_t KillingIntEnd = KillingOff + KillingSize;
333
334 // Find any intervals ending at, or after, KillingIntStart which start
335 // before KillingIntEnd.
336 auto ILI = IM.lower_bound(KillingIntStart);
337 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
338 // This existing interval is overlapped with the current store somewhere
339 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
340 // intervals and adjusting our start and end.
341 KillingIntStart = std::min(KillingIntStart, ILI->second);
342 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
343 ILI = IM.erase(ILI);
344
345 // Continue erasing and adjusting our end in case other previous
346 // intervals are also overlapped with the current store.
347 //
348 // |--- dead 1 ---| |--- dead 2 ---|
349 // |------- killing---------|
350 //
351 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
352 assert(ILI->second > KillingIntStart && "Unexpected interval");
353 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
354 ILI = IM.erase(ILI);
355 }
356 }
357
358 IM[KillingIntEnd] = KillingIntStart;
359
360 ILI = IM.begin();
361 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
362 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
363 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
364 << ") Composite KillingLoc [" << ILI->second << ", "
365 << ILI->first << ")\n");
366 ++NumCompletePartials;
367 return OW_Complete;
368 }
369 }
370
371 // Check for a dead store which writes to all the memory locations that
372 // the killing store writes to.
373 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
374 int64_t(DeadOff + DeadSize) > KillingOff &&
375 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
376 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
377 << ", " << int64_t(DeadOff + DeadSize)
378 << ") by a killing store [" << KillingOff << ", "
379 << int64_t(KillingOff + KillingSize) << ")\n");
380 // TODO: Maybe come up with a better name?
381 return OW_PartialEarlierWithFullLater;
382 }
383
384 // Another interesting case is if the killing store overwrites the end of the
385 // dead store.
386 //
387 // |--dead--|
388 // |-- killing --|
389 //
390 // In this case we may want to trim the size of dead store to avoid
391 // generating stores to addresses which will definitely be overwritten killing
392 // store.
394 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
395 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
396 return OW_End;
397
398 // Finally, we also need to check if the killing store overwrites the
399 // beginning of the dead store.
400 //
401 // |--dead--|
402 // |-- killing --|
403 //
404 // In this case we may want to move the destination address and trim the size
405 // of dead store to avoid generating stores to addresses which will definitely
406 // be overwritten killing store.
408 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
409 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
410 "Expect to be handled as OW_Complete");
411 return OW_Begin;
412 }
413 // Otherwise, they don't completely overlap.
414 return OW_Unknown;
415}
416
417/// Returns true if the memory which is accessed by the second instruction is not
418/// modified between the first and the second instruction.
419/// Precondition: Second instruction must be dominated by the first
420/// instruction.
421static bool
424 DominatorTree *DT) {
425 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
426 // instructions which can modify the memory location accessed by SecondI.
427 //
428 // While doing the walk keep track of the address to check. It might be
429 // different in different basic blocks due to PHI translation.
430 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
432 // Keep track of the address we visited each block with. Bail out if we
433 // visit a block with different addresses.
435
436 BasicBlock::iterator FirstBBI(FirstI);
437 ++FirstBBI;
438 BasicBlock::iterator SecondBBI(SecondI);
439 BasicBlock *FirstBB = FirstI->getParent();
440 BasicBlock *SecondBB = SecondI->getParent();
441 MemoryLocation MemLoc;
442 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
443 MemLoc = MemoryLocation::getForDest(MemSet);
444 else
445 MemLoc = MemoryLocation::get(SecondI);
446
447 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
448
449 // Start checking the SecondBB.
450 WorkList.push_back(
451 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
452 bool isFirstBlock = true;
453
454 // Check all blocks going backward until we reach the FirstBB.
455 while (!WorkList.empty()) {
456 BlockAddressPair Current = WorkList.pop_back_val();
457 BasicBlock *B = Current.first;
458 PHITransAddr &Addr = Current.second;
459 Value *Ptr = Addr.getAddr();
460
461 // Ignore instructions before FirstI if this is the FirstBB.
462 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
463
465 if (isFirstBlock) {
466 // Ignore instructions after SecondI if this is the first visit of SecondBB.
467 assert(B == SecondBB && "first block is not the store block");
468 EI = SecondBBI;
469 isFirstBlock = false;
470 } else {
471 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
472 // In this case we also have to look at instructions after SecondI.
473 EI = B->end();
474 }
475 for (; BI != EI; ++BI) {
476 Instruction *I = &*BI;
477 if (I->mayWriteToMemory() && I != SecondI)
478 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
479 return false;
480 }
481 if (B != FirstBB) {
482 assert(B != &FirstBB->getParent()->getEntryBlock() &&
483 "Should not hit the entry block because SI must be dominated by LI");
484 for (BasicBlock *Pred : predecessors(B)) {
485 PHITransAddr PredAddr = Addr;
486 if (PredAddr.needsPHITranslationFromBlock(B)) {
487 if (!PredAddr.isPotentiallyPHITranslatable())
488 return false;
489 if (!PredAddr.translateValue(B, Pred, DT, false))
490 return false;
491 }
492 Value *TranslatedPtr = PredAddr.getAddr();
493 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
494 if (!Inserted.second) {
495 // We already visited this block before. If it was with a different
496 // address - bail out!
497 if (TranslatedPtr != Inserted.first->second)
498 return false;
499 // ... otherwise just skip it.
500 continue;
501 }
502 WorkList.push_back(std::make_pair(Pred, PredAddr));
503 }
504 }
505 }
506 return true;
507}
508
509static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
510 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
511 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
512 const DataLayout &DL = Inst->getDataLayout();
513 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
514 uint64_t DeadSliceOffsetInBits =
515 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
516 auto SetDeadFragExpr = [](auto *Assign,
517 DIExpression::FragmentInfo DeadFragment) {
518 // createFragmentExpression expects an offset relative to the existing
519 // fragment offset if there is one.
520 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
521 Assign->getExpression()
522 ->getFragmentInfo()
523 .value_or(DIExpression::FragmentInfo(0, 0))
524 .OffsetInBits;
526 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
527 Assign->setExpression(*NewExpr);
528 return;
529 }
530 // Failed to create a fragment expression for this so discard the value,
531 // making this a kill location.
533 DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,
534 DeadFragment.SizeInBits);
535 Assign->setExpression(Expr);
536 Assign->setKillLocation();
537 };
538
539 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
540 // link to any instructions. Created in the loop below (once).
541 DIAssignID *LinkToNothing = nullptr;
542 LLVMContext &Ctx = Inst->getContext();
543 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
544 if (!LinkToNothing)
545 LinkToNothing = DIAssignID::getDistinct(Ctx);
546 return LinkToNothing;
547 };
548
549 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
550 // overlapping dbg.assign intrinsic.
551 for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
552 std::optional<DIExpression::FragmentInfo> NewFragment;
553 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
554 DeadSliceSizeInBits, Assign,
555 NewFragment) ||
556 !NewFragment) {
557 // We couldn't calculate the intersecting fragment for some reason. Be
558 // cautious and unlink the whole assignment from the store.
559 Assign->setKillAddress();
560 Assign->setAssignId(GetDeadLink());
561 continue;
562 }
563 // No intersect.
564 if (NewFragment->SizeInBits == 0)
565 continue;
566
567 // Fragments overlap: insert a new dbg.assign for this dead part.
568 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
569 NewAssign->insertAfter(Assign->getIterator());
570 NewAssign->setAssignId(GetDeadLink());
571 if (NewFragment)
572 SetDeadFragExpr(NewAssign, *NewFragment);
573 NewAssign->setKillAddress();
574 }
575}
576
577/// Update the attributes given that a memory access is updated (the
578/// dereferenced pointer could be moved forward when shortening a
579/// mem intrinsic).
580static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
581 uint64_t PtrOffset) {
582 // Remember old attributes.
583 AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
584
585 // Find attributes that should be kept, and remove the rest.
586 AttributeMask AttrsToRemove;
587 for (auto &Attr : OldAttrs) {
588 if (Attr.hasKindAsEnum()) {
589 switch (Attr.getKindAsEnum()) {
590 default:
591 break;
592 case Attribute::Alignment:
593 // Only keep alignment if PtrOffset satisfy the alignment.
594 if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset))
595 continue;
596 break;
597 case Attribute::Dereferenceable:
598 case Attribute::DereferenceableOrNull:
599 // We could reduce the size of these attributes according to
600 // PtrOffset. But we simply drop these for now.
601 break;
602 case Attribute::NonNull:
603 case Attribute::NoUndef:
604 continue;
605 }
606 }
607 AttrsToRemove.addAttribute(Attr);
608 }
609
610 // Remove the attributes that should be dropped.
611 Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
612}
613
614static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
615 uint64_t &DeadSize, int64_t KillingStart,
616 uint64_t KillingSize, bool IsOverwriteEnd) {
617 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
618 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
619
620 // We assume that memet/memcpy operates in chunks of the "largest" native
621 // type size and aligned on the same value. That means optimal start and size
622 // of memset/memcpy should be modulo of preferred alignment of that type. That
623 // is it there is no any sense in trying to reduce store size any further
624 // since any "extra" stores comes for free anyway.
625 // On the other hand, maximum alignment we can achieve is limited by alignment
626 // of initial store.
627
628 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
629 // "largest" native type.
630 // Note: What is the proper way to get that value?
631 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
632 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
633
634 int64_t ToRemoveStart = 0;
635 uint64_t ToRemoveSize = 0;
636 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
637 // maintained on the remaining store.
638 if (IsOverwriteEnd) {
639 // Calculate required adjustment for 'KillingStart' in order to keep
640 // remaining store size aligned on 'PerfAlign'.
641 uint64_t Off =
642 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
643 ToRemoveStart = KillingStart + Off;
644 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
645 return false;
646 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
647 } else {
648 ToRemoveStart = DeadStart;
649 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
650 "Not overlapping accesses?");
651 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
652 // Calculate required adjustment for 'ToRemoveSize'in order to keep
653 // start of the remaining store aligned on 'PerfAlign'.
654 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
655 if (Off != 0) {
656 if (ToRemoveSize <= (PrefAlign.value() - Off))
657 return false;
658 ToRemoveSize -= PrefAlign.value() - Off;
659 }
660 assert(isAligned(PrefAlign, ToRemoveSize) &&
661 "Should preserve selected alignment");
662 }
663
664 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
665 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
666
667 uint64_t NewSize = DeadSize - ToRemoveSize;
668 if (DeadIntrinsic->isAtomic()) {
669 // When shortening an atomic memory intrinsic, the newly shortened
670 // length must remain an integer multiple of the element size.
671 const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
672 if (0 != NewSize % ElementSize)
673 return false;
674 }
675
676 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
677 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
678 << "\n KILLER [" << ToRemoveStart << ", "
679 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
680
681 DeadIntrinsic->setLength(NewSize);
682 DeadIntrinsic->setDestAlignment(PrefAlign);
683
684 Value *OrigDest = DeadIntrinsic->getRawDest();
685 if (!IsOverwriteEnd) {
686 Value *Indices[1] = {
687 ConstantInt::get(DeadIntrinsic->getLength()->getType(), ToRemoveSize)};
689 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
690 DeadI->getIterator());
691 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
692 DeadIntrinsic->setDest(NewDestGEP);
693 adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize);
694 }
695
696 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
697 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
698 IsOverwriteEnd);
699
700 // Finally update start and size of dead access.
701 if (!IsOverwriteEnd)
702 DeadStart += ToRemoveSize;
703 DeadSize = NewSize;
704
705 return true;
706}
707
709 int64_t &DeadStart, uint64_t &DeadSize) {
710 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
711 return false;
712
713 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
714 int64_t KillingStart = OII->second;
715 uint64_t KillingSize = OII->first - KillingStart;
716
717 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
718
719 if (KillingStart > DeadStart &&
720 // Note: "KillingStart - KillingStart" is known to be positive due to
721 // preceding check.
722 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
723 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
724 // be non negative due to preceding checks.
725 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
726 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
727 true)) {
728 IntervalMap.erase(OII);
729 return true;
730 }
731 }
732 return false;
733}
734
737 int64_t &DeadStart, uint64_t &DeadSize) {
739 return false;
740
741 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
742 int64_t KillingStart = OII->second;
743 uint64_t KillingSize = OII->first - KillingStart;
744
745 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
746
747 if (KillingStart <= DeadStart &&
748 // Note: "DeadStart - KillingStart" is known to be non negative due to
749 // preceding check.
750 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
751 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
752 // be positive due to preceding checks.
753 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
754 "Should have been handled as OW_Complete");
755 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
756 false)) {
757 IntervalMap.erase(OII);
758 return true;
759 }
760 }
761 return false;
762}
763
764static Constant *
766 int64_t KillingOffset, int64_t DeadOffset,
768 DominatorTree *DT) {
769
770 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
771 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
772 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
773 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
774 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
775 // If the store we find is:
776 // a) partially overwritten by the store to 'Loc'
777 // b) the killing store is fully contained in the dead one and
778 // c) they both have a constant value
779 // d) none of the two stores need padding
780 // Merge the two stores, replacing the dead store's value with a
781 // merge of both values.
782 // TODO: Deal with other constant types (vectors, etc), and probably
783 // some mem intrinsics (if needed)
784
785 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
786 APInt KillingValue =
787 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
788 unsigned KillingBits = KillingValue.getBitWidth();
789 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
790 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
791
792 // Offset of the smaller store inside the larger store
793 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
794 unsigned LShiftAmount =
795 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
796 : BitOffsetDiff;
797 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
798 LShiftAmount + KillingBits);
799 // Clear the bits we'll be replacing, then OR with the smaller
800 // store, shifted appropriately.
801 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
802 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
803 << "\n Killing: " << *KillingI
804 << "\n Merged Value: " << Merged << '\n');
805 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
806 }
807 return nullptr;
808}
809
810// Returns true if \p I is an intrinsic that does not read or write memory.
813 switch (II->getIntrinsicID()) {
814 case Intrinsic::lifetime_start:
815 case Intrinsic::lifetime_end:
816 case Intrinsic::invariant_end:
817 case Intrinsic::launder_invariant_group:
818 case Intrinsic::assume:
819 return true;
820 case Intrinsic::dbg_declare:
821 case Intrinsic::dbg_label:
822 case Intrinsic::dbg_value:
823 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
824 default:
825 return false;
826 }
827 }
828 return false;
829}
830
831// Check if we can ignore \p D for DSE.
832static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
833 Instruction *DI = D->getMemoryInst();
834 // Calls that only access inaccessible memory cannot read or write any memory
835 // locations we consider for elimination.
836 if (auto *CB = dyn_cast<CallBase>(DI))
837 if (CB->onlyAccessesInaccessibleMemory())
838 return true;
839
840 // We can eliminate stores to locations not visible to the caller across
841 // throwing instructions.
842 if (DI->mayThrow() && !DefVisibleToCaller)
843 return true;
844
845 // We can remove the dead stores, irrespective of the fence and its ordering
846 // (release/acquire/seq_cst). Fences only constraints the ordering of
847 // already visible stores, it does not make a store visible to other
848 // threads. So, skipping over a fence does not change a store from being
849 // dead.
850 if (isa<FenceInst>(DI))
851 return true;
852
853 // Skip intrinsics that do not really read or modify memory.
854 if (isNoopIntrinsic(DI))
855 return true;
856
857 return false;
858}
859
860namespace {
861
862// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
863// defined by `MemDef`.
864struct MemoryLocationWrapper {
865 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
866 bool DefByInitializesAttr)
867 : MemLoc(MemLoc), MemDef(MemDef),
868 DefByInitializesAttr(DefByInitializesAttr) {
869 assert(MemLoc.Ptr && "MemLoc should be not null");
870 UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
871 DefInst = MemDef->getMemoryInst();
872 }
873
874 MemoryLocation MemLoc;
875 const Value *UnderlyingObject;
876 MemoryDef *MemDef;
877 Instruction *DefInst;
878 bool DefByInitializesAttr = false;
879};
880
881// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
882// defined by this MemoryDef.
883struct MemoryDefWrapper {
884 MemoryDefWrapper(MemoryDef *MemDef,
885 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
886 DefInst = MemDef->getMemoryInst();
887 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
888 DefinedLocations.push_back(
889 MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
890 }
891 Instruction *DefInst;
893};
894
895struct ArgumentInitInfo {
896 unsigned Idx;
897 bool IsDeadOrInvisibleOnUnwind;
898 ConstantRangeList Inits;
899};
900} // namespace
901
904 return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
905}
906
907// Return the intersected range list of the initializes attributes of "Args".
908// "Args" are call arguments that alias to each other.
909// If any argument in "Args" doesn't have dead_on_unwind attr and
910// "CallHasNoUnwindAttr" is false, return empty.
913 bool CallHasNoUnwindAttr) {
914 if (Args.empty())
915 return {};
916
917 // To address unwind, the function should have nounwind attribute or the
918 // arguments have dead or invisible on unwind. Otherwise, return empty.
919 for (const auto &Arg : Args) {
920 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
921 return {};
922 if (Arg.Inits.empty())
923 return {};
924 }
925
926 ConstantRangeList IntersectedIntervals = Args.front().Inits;
927 for (auto &Arg : Args.drop_front())
928 IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
929
930 return IntersectedIntervals;
931}
932
933namespace {
934
935struct DSEState {
936 Function &F;
937 AliasAnalysis &AA;
938 EarliestEscapeAnalysis EA;
939
940 /// The single BatchAA instance that is used to cache AA queries. It will
941 /// not be invalidated over the whole run. This is safe, because:
942 /// 1. Only memory writes are removed, so the alias cache for memory
943 /// locations remains valid.
944 /// 2. No new instructions are added (only instructions removed), so cached
945 /// information for a deleted value cannot be accessed by a re-used new
946 /// value pointer.
947 BatchAAResults BatchAA;
948
949 MemorySSA &MSSA;
950 DominatorTree &DT;
951 PostDominatorTree &PDT;
952 const TargetLibraryInfo &TLI;
953 const DataLayout &DL;
954 const CycleInfo &CI;
955
956 // All MemoryDefs that potentially could kill other MemDefs.
958 // Any that should be skipped as they are already deleted
959 SmallPtrSet<MemoryAccess *, 4> SkipStores;
960 // Keep track whether a given object is captured before return or not.
961 DenseMap<const Value *, bool> CapturedBeforeReturn;
962 // Keep track of all of the objects that are invisible to the caller after
963 // the function returns.
964 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
965 DenseMap<const Value *, uint64_t> InvisibleToCallerAfterRetBounded;
966 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
967 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
968 // Post-order numbers for each basic block. Used to figure out if memory
969 // accesses are executed before another access.
970 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
971
972 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
973 /// basic block.
974 MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
975 // Check if there are root nodes that are terminated by UnreachableInst.
976 // Those roots pessimize post-dominance queries. If there are such roots,
977 // fall back to CFG scan starting from all non-unreachable roots.
978 bool AnyUnreachableExit;
979
980 // Whether or not we should iterate on removing dead stores at the end of the
981 // function due to removing a store causing a previously captured pointer to
982 // no longer be captured.
983 bool ShouldIterateEndOfFunctionDSE;
984
985 /// Dead instructions to be removed at the end of DSE.
987
988 // Class contains self-reference, make sure it's not copied/moved.
989 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
990 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
991 const CycleInfo &CI);
992 DSEState(const DSEState &) = delete;
993 DSEState &operator=(const DSEState &) = delete;
994
995 LocationSize strengthenLocationSize(const Instruction *I,
996 LocationSize Size) const;
997
998 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
999 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1000 /// location (by \p DeadI instruction).
1001 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1002 /// \p DeadI, but they both write to the same underlying object. In that
1003 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1004 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1005 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1006 OverwriteResult isOverwrite(const Instruction *KillingI,
1007 const Instruction *DeadI,
1008 const MemoryLocation &KillingLoc,
1009 const MemoryLocation &DeadLoc,
1010 int64_t &KillingOff, int64_t &DeadOff);
1011
1012 bool isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1013 const LocationSize StoreSize);
1014
1015 bool isInvisibleToCallerOnUnwind(const Value *V);
1016
1017 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const;
1018
1019 // Returns a list of <MemoryLocation, bool> pairs written by I.
1020 // The bool means whether the write is from Initializes attr.
1022 getLocForInst(Instruction *I, bool ConsiderInitializesAttr);
1023
1024 /// Assuming this instruction has a dead analyzable write, can we delete
1025 /// this instruction?
1026 bool isRemovable(Instruction *I);
1027
1028 /// Returns true if \p UseInst completely overwrites \p DefLoc
1029 /// (stored by \p DefInst).
1030 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1031 Instruction *UseInst);
1032
1033 /// Returns true if \p Def is not read before returning from the function.
1034 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc);
1035
1036 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1037 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1038 /// indicating whether \p I is a free-like call.
1039 std::optional<std::pair<MemoryLocation, bool>>
1040 getLocForTerminator(Instruction *I) const;
1041
1042 /// Returns true if \p I is a memory terminator instruction like
1043 /// llvm.lifetime.end or free.
1044 bool isMemTerminatorInst(Instruction *I) const;
1045
1046 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1047 /// instruction \p AccessI.
1048 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1049 Instruction *MaybeTerm);
1050
1051 // Returns true if \p Use may read from \p DefLoc.
1052 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst);
1053
1054 /// Returns true if a dependency between \p Current and \p KillingDef is
1055 /// guaranteed to be loop invariant for the loops that they are in. Either
1056 /// because they are known to be in the same block, in the same loop level or
1057 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1058 /// during execution of the containing function.
1059 bool isGuaranteedLoopIndependent(const Instruction *Current,
1060 const Instruction *KillingDef,
1061 const MemoryLocation &CurrentLoc);
1062
1063 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1064 /// loop. In particular, this guarantees that it only references a single
1065 /// MemoryLocation during execution of the containing function.
1066 bool isGuaranteedLoopInvariant(const Value *Ptr);
1067
1068 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1069 // with no read access between them or on any other path to a function exit
1070 // block if \p KillingLoc is not accessible after the function returns. If
1071 // there is no such MemoryDef, return std::nullopt. The returned value may not
1072 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1073 // encounter an aliasing MemoryUse (read).
1074 std::optional<MemoryAccess *>
1075 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1076 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1077 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1078 bool IsMemTerm, unsigned &PartialLimit,
1079 bool IsInitializesAttrMemLoc);
1080
1081 /// Delete dead memory defs and recursively add their operands to ToRemove if
1082 /// they became dead.
1083 void
1084 deleteDeadInstruction(Instruction *SI,
1085 SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr);
1086
1087 // Check for any extra throws between \p KillingI and \p DeadI that block
1088 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1089 // MemoryDef that may throw are handled during the walk from one def to the
1090 // next.
1091 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1092 const Value *KillingUndObj);
1093
1094 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1095 // instructions act as barriers:
1096 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1097 // object.
1098 // * Atomic stores stronger that monotonic.
1099 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI);
1100
1101 /// Eliminate writes to objects that are not visible in the caller and are not
1102 /// accessed before returning from the function.
1103 bool eliminateDeadWritesAtEndOfFunction();
1104
1105 /// If we have a zero initializing memset following a call to malloc,
1106 /// try folding it into a call to calloc.
1107 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO);
1108
1109 // Check if there is a dominating condition, that implies that the value
1110 // being stored in a ptr is already present in the ptr.
1111 bool dominatingConditionImpliesValue(MemoryDef *Def);
1112
1113 /// \returns true if \p Def is a no-op store, either because it
1114 /// directly stores back a loaded value or stores zero to a calloced object.
1115 bool storeIsNoop(MemoryDef *Def, const Value *DefUO);
1116
1117 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL);
1118
1119 /// Eliminates writes to locations where the value that is being written
1120 /// is already stored at the same location.
1121 bool eliminateRedundantStoresOfExistingValues();
1122
1123 // Return the locations written by the initializes attribute.
1124 // Note that this function considers:
1125 // 1. Unwind edge: use "initializes" attribute only if the callee has
1126 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
1127 // or the argument is invisible to caller on unwind. That is, we don't
1128 // perform incorrect DSE on unwind edges in the current function.
1129 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
1130 // the intersected range list of their "initializes" attributes.
1131 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
1132
1133 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
1134 // killed by `KillingLocWrapper.MemDef`. Return whether
1135 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
1136 std::pair<bool, bool>
1137 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
1138
1139 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
1140 // change state: whether make any change.
1141 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
1142};
1143
1144} // end anonymous namespace
1145
1146static void pushMemUses(MemoryAccess *Acc,
1149 for (Use &U : Acc->uses()) {
1150 auto *MA = cast<MemoryAccess>(U.getUser());
1151 if (Visited.insert(MA).second)
1152 WorkList.push_back(MA);
1153 }
1154}
1155
1156// Return true if "Arg" is function local and isn't captured before "CB".
1157static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
1159 const Value *UnderlyingObj = getUnderlyingObject(Arg);
1160 return isIdentifiedFunctionLocal(UnderlyingObj) &&
1162 EA.getCapturesBefore(UnderlyingObj, CB, /*OrAt*/ true));
1163}
1164
1165DSEState::DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1167 const TargetLibraryInfo &TLI, const CycleInfo &CI)
1168 : F(F), AA(AA), EA(DT, nullptr, &CI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),
1169 PDT(PDT), TLI(TLI), DL(F.getDataLayout()), CI(CI) {
1170 // Collect blocks with throwing instructions not modeled in MemorySSA and
1171 // alloc-like objects.
1172 unsigned PO = 0;
1173 for (BasicBlock *BB : post_order(&F)) {
1174 PostOrderNumbers[BB] = PO++;
1175 for (Instruction &I : *BB) {
1176 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
1177 if (I.mayThrow() && !MA)
1178 ThrowingBlocks.insert(I.getParent());
1179
1180 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
1181 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1182 (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
1184 MemDefs.push_back(MD);
1185 }
1186 }
1187
1188 // Treat byval, inalloca or dead on return arguments the same as Allocas,
1189 // stores to them are dead at the end of the function.
1190 for (Argument &AI : F.args()) {
1191 if (AI.hasPassPointeeByValueCopyAttr()) {
1192 InvisibleToCallerAfterRet.insert({&AI, true});
1193 continue;
1194 }
1195
1196 if (!AI.getType()->isPointerTy())
1197 continue;
1198
1199 const DeadOnReturnInfo &Info = AI.getDeadOnReturnInfo();
1200 if (Info.coversAllReachableMemory())
1201 InvisibleToCallerAfterRet.insert({&AI, true});
1202 else if (uint64_t DeadBytes = Info.getNumberOfDeadBytes())
1203 InvisibleToCallerAfterRetBounded.insert({&AI, DeadBytes});
1204 }
1205
1206 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
1207 return isa<UnreachableInst>(E->getTerminator());
1208 });
1209}
1210
1211LocationSize DSEState::strengthenLocationSize(const Instruction *I,
1212 LocationSize Size) const {
1213 if (auto *CB = dyn_cast<CallBase>(I)) {
1214 LibFunc F;
1215 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
1216 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1217 // Use the precise location size specified by the 3rd argument
1218 // for determining KillingI overwrites DeadLoc if it is a memset_chk
1219 // instruction. memset_chk will write either the amount specified as 3rd
1220 // argument or the function will immediately abort and exit the program.
1221 // NOTE: AA may determine NoAlias if it can prove that the access size
1222 // is larger than the allocation size due to that being UB. To avoid
1223 // returning potentially invalid NoAlias results by AA, limit the use of
1224 // the precise location size to isOverwrite.
1225 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
1226 return LocationSize::precise(Len->getZExtValue());
1227 }
1228 }
1229 return Size;
1230}
1231
1232OverwriteResult DSEState::isOverwrite(const Instruction *KillingI,
1233 const Instruction *DeadI,
1234 const MemoryLocation &KillingLoc,
1235 const MemoryLocation &DeadLoc,
1236 int64_t &KillingOff, int64_t &DeadOff) {
1237 // AliasAnalysis does not always account for loops. Limit overwrite checks
1238 // to dependencies for which we can guarantee they are independent of any
1239 // loops they are in.
1240 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
1241 return OW_Unknown;
1242
1243 LocationSize KillingLocSize =
1244 strengthenLocationSize(KillingI, KillingLoc.Size);
1245 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1246 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1247 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
1248 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
1249
1250 // Check whether the killing store overwrites the whole object, in which
1251 // case the size/offset of the dead store does not matter.
1252 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1253 isIdentifiedObject(KillingUndObj)) {
1254 std::optional<TypeSize> KillingUndObjSize =
1255 getPointerSize(KillingUndObj, DL, TLI, &F);
1256 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1257 return OW_Complete;
1258 }
1259
1260 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1261 // get imprecise values here, though (except for unknown sizes).
1262 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1263 // In case no constant size is known, try to an IR values for the number
1264 // of bytes written and check if they match.
1265 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
1266 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
1267 if (KillingMemI && DeadMemI) {
1268 const Value *KillingV = KillingMemI->getLength();
1269 const Value *DeadV = DeadMemI->getLength();
1270 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
1271 return OW_Complete;
1272 }
1273
1274 // Masked stores have imprecise locations, but we can reason about them
1275 // to some extent.
1276 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
1277 }
1278
1279 const TypeSize KillingSize = KillingLocSize.getValue();
1280 const TypeSize DeadSize = DeadLoc.Size.getValue();
1281 // Bail on doing Size comparison which depends on AA for now
1282 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1283 const bool AnyScalable = DeadSize.isScalable() || KillingLocSize.isScalable();
1284
1285 if (AnyScalable)
1286 return OW_Unknown;
1287 // Query the alias information
1288 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
1289
1290 // If the start pointers are the same, we just have to compare sizes to see if
1291 // the killing store was larger than the dead store.
1292 if (AAR == AliasResult::MustAlias) {
1293 // Make sure that the KillingSize size is >= the DeadSize size.
1294 if (KillingSize >= DeadSize)
1295 return OW_Complete;
1296 }
1297
1298 // If we hit a partial alias we may have a full overwrite
1299 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1300 int32_t Off = AAR.getOffset();
1301 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1302 return OW_Complete;
1303 }
1304
1305 // If we can't resolve the same pointers to the same object, then we can't
1306 // analyze them at all.
1307 if (DeadUndObj != KillingUndObj) {
1308 // Non aliasing stores to different objects don't overlap. Note that
1309 // if the killing store is known to overwrite whole object (out of
1310 // bounds access overwrites whole object as well) then it is assumed to
1311 // completely overwrite any store to the same object even if they don't
1312 // actually alias (see next check).
1313 if (AAR == AliasResult::NoAlias)
1314 return OW_None;
1315 return OW_Unknown;
1316 }
1317
1318 // Okay, we have stores to two completely different pointers. Try to
1319 // decompose the pointer into a "base + constant_offset" form. If the base
1320 // pointers are equal, then we can reason about the two stores.
1321 DeadOff = 0;
1322 KillingOff = 0;
1323 const Value *DeadBasePtr =
1324 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1325 const Value *KillingBasePtr =
1326 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1327
1328 // If the base pointers still differ, we have two completely different
1329 // stores.
1330 if (DeadBasePtr != KillingBasePtr)
1331 return OW_Unknown;
1332
1333 // The killing access completely overlaps the dead store if and only if
1334 // both start and end of the dead one is "inside" the killing one:
1335 // |<->|--dead--|<->|
1336 // |-----killing------|
1337 // Accesses may overlap if and only if start of one of them is "inside"
1338 // another one:
1339 // |<->|--dead--|<-------->|
1340 // |-------killing--------|
1341 // OR
1342 // |-------dead-------|
1343 // |<->|---killing---|<----->|
1344 //
1345 // We have to be careful here as *Off is signed while *.Size is unsigned.
1346
1347 // Check if the dead access starts "not before" the killing one.
1348 if (DeadOff >= KillingOff) {
1349 // If the dead access ends "not after" the killing access then the
1350 // dead one is completely overwritten by the killing one.
1351 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1352 return OW_Complete;
1353 // If start of the dead access is "before" end of the killing access
1354 // then accesses overlap.
1355 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1356 return OW_MaybePartial;
1357 }
1358 // If start of the killing access is "before" end of the dead access then
1359 // accesses overlap.
1360 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1361 return OW_MaybePartial;
1362 }
1363
1364 // Can reach here only if accesses are known not to overlap.
1365 return OW_None;
1366}
1367
1368bool DSEState::isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1369 const LocationSize StoreSize) {
1370 if (isa<AllocaInst>(V))
1371 return true;
1372
1373 auto IBounded = InvisibleToCallerAfterRetBounded.find(V);
1374 if (IBounded != InvisibleToCallerAfterRetBounded.end()) {
1375 int64_t ValueOffset;
1376 [[maybe_unused]] const Value *BaseValue =
1377 GetPointerBaseWithConstantOffset(Ptr, ValueOffset, DL);
1378 // If we are not able to find a constant offset from the UO, we have to
1379 // pessimistically assume that the store writes to memory out of the
1380 // dead_on_return bounds.
1381 if (BaseValue != V)
1382 return false;
1383 // This store is only invisible after return if we are in bounds of the
1384 // range marked dead.
1385 if (StoreSize.hasValue() &&
1386 ValueOffset + StoreSize.getValue() <= IBounded->second &&
1387 ValueOffset >= 0)
1388 return true;
1389 }
1390 auto I = InvisibleToCallerAfterRet.insert({V, false});
1391 if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1392 I.first->second = capturesNothing(PointerMayBeCaptured(
1393 V, /*ReturnCaptures=*/true, CaptureComponents::Provenance));
1394 return I.first->second;
1395}
1396
1397bool DSEState::isInvisibleToCallerOnUnwind(const Value *V) {
1398 bool RequiresNoCaptureBeforeUnwind;
1399 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1400 return false;
1401 if (!RequiresNoCaptureBeforeUnwind)
1402 return true;
1403
1404 auto I = CapturedBeforeReturn.insert({V, true});
1405 if (I.second)
1406 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1407 // with the killing MemoryDef. But we refrain from doing so for now to
1408 // limit compile-time and this does not cause any changes to the number
1409 // of stores removed on a large test set in practice.
1410 I.first->second = capturesAnything(PointerMayBeCaptured(
1411 V, /*ReturnCaptures=*/false, CaptureComponents::Provenance));
1412 return !I.first->second;
1413}
1414
1415std::optional<MemoryLocation> DSEState::getLocForWrite(Instruction *I) const {
1416 if (!I->mayWriteToMemory())
1417 return std::nullopt;
1418
1419 if (auto *CB = dyn_cast<CallBase>(I))
1420 return MemoryLocation::getForDest(CB, TLI);
1421
1423}
1424
1426DSEState::getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1428 if (isMemTerminatorInst(I)) {
1429 if (auto Loc = getLocForTerminator(I))
1430 Locations.push_back(std::make_pair(Loc->first, false));
1431 return Locations;
1432 }
1433
1434 if (auto Loc = getLocForWrite(I))
1435 Locations.push_back(std::make_pair(*Loc, false));
1436
1437 if (ConsiderInitializesAttr) {
1438 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1439 Locations.push_back(std::make_pair(MemLoc, true));
1440 }
1441 }
1442 return Locations;
1443}
1444
1445bool DSEState::isRemovable(Instruction *I) {
1446 assert(getLocForWrite(I) && "Must have analyzable write");
1447
1448 // Don't remove volatile/atomic stores.
1449 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1450 return SI->isUnordered();
1451
1452 if (auto *CB = dyn_cast<CallBase>(I)) {
1453 // Don't remove volatile memory intrinsics.
1454 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1455 return !MI->isVolatile();
1456
1457 // Never remove dead lifetime intrinsics, e.g. because they are followed
1458 // by a free.
1459 if (CB->isLifetimeStartOrEnd())
1460 return false;
1461
1462 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1463 !CB->isTerminator();
1464 }
1465
1466 return false;
1467}
1468
1469bool DSEState::isCompleteOverwrite(const MemoryLocation &DefLoc,
1470 Instruction *DefInst, Instruction *UseInst) {
1471 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1472 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1473 // MemoryDef.
1474 if (!UseInst->mayWriteToMemory())
1475 return false;
1476
1477 if (auto *CB = dyn_cast<CallBase>(UseInst))
1478 if (CB->onlyAccessesInaccessibleMemory())
1479 return false;
1480
1481 int64_t InstWriteOffset, DepWriteOffset;
1482 if (auto CC = getLocForWrite(UseInst))
1483 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1484 DepWriteOffset) == OW_Complete;
1485 return false;
1486}
1487
1488bool DSEState::isWriteAtEndOfFunction(MemoryDef *Def,
1489 const MemoryLocation &DefLoc) {
1490 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1491 << *Def->getMemoryInst()
1492 << ") is at the end the function \n");
1494 SmallPtrSet<MemoryAccess *, 8> Visited;
1495
1496 pushMemUses(Def, WorkList, Visited);
1497 for (unsigned I = 0; I < WorkList.size(); I++) {
1498 if (WorkList.size() >= MemorySSAScanLimit) {
1499 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1500 return false;
1501 }
1502
1503 MemoryAccess *UseAccess = WorkList[I];
1504 if (isa<MemoryPhi>(UseAccess)) {
1505 // AliasAnalysis does not account for loops. Limit elimination to
1506 // candidates for which we can guarantee they always store to the same
1507 // memory location.
1508 if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
1509 return false;
1510
1511 pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1512 continue;
1513 }
1514 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1515 // of times this is called and/or caching it.
1516 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1517 if (isReadClobber(DefLoc, UseInst)) {
1518 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1519 return false;
1520 }
1521
1522 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1523 pushMemUses(UseDef, WorkList, Visited);
1524 }
1525 return true;
1526}
1527
1528std::optional<std::pair<MemoryLocation, bool>>
1529DSEState::getLocForTerminator(Instruction *I) const {
1530 if (auto *CB = dyn_cast<CallBase>(I)) {
1531 if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1532 return {
1533 std::make_pair(MemoryLocation::getForArgument(CB, 0, &TLI), false)};
1534 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1535 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1536 }
1537
1538 return std::nullopt;
1539}
1540
1541bool DSEState::isMemTerminatorInst(Instruction *I) const {
1542 auto *CB = dyn_cast<CallBase>(I);
1543 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1544 getFreedOperand(CB, &TLI) != nullptr);
1545}
1546
1547bool DSEState::isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1548 Instruction *MaybeTerm) {
1549 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1550 getLocForTerminator(MaybeTerm);
1551
1552 if (!MaybeTermLoc)
1553 return false;
1554
1555 // If the terminator is a free-like call, all accesses to the underlying
1556 // object can be considered terminated.
1557 if (getUnderlyingObject(Loc.Ptr) !=
1558 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1559 return false;
1560
1561 auto TermLoc = MaybeTermLoc->first;
1562 if (MaybeTermLoc->second) {
1563 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1564 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1565 }
1566 int64_t InstWriteOffset = 0;
1567 int64_t DepWriteOffset = 0;
1568 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1569 DepWriteOffset) == OW_Complete;
1570}
1571
1572bool DSEState::isReadClobber(const MemoryLocation &DefLoc,
1573 Instruction *UseInst) {
1574 if (isNoopIntrinsic(UseInst))
1575 return false;
1576
1577 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1578 // treated as read clobber.
1579 if (auto SI = dyn_cast<StoreInst>(UseInst))
1580 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1581
1582 if (!UseInst->mayReadFromMemory())
1583 return false;
1584
1585 if (auto *CB = dyn_cast<CallBase>(UseInst))
1586 if (CB->onlyAccessesInaccessibleMemory())
1587 return false;
1588
1589 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1590}
1591
1592bool DSEState::isGuaranteedLoopIndependent(const Instruction *Current,
1593 const Instruction *KillingDef,
1594 const MemoryLocation &CurrentLoc) {
1595 // If the dependency is within the same block or loop level (being careful
1596 // of irreducible loops), we know that AA will return a valid result for the
1597 // memory dependency. (Both at the function level, outside of any loop,
1598 // would also be valid but we currently disable that to limit compile time).
1599 if (Current->getParent() == KillingDef->getParent())
1600 return true;
1601 const Cycle *CurrentC = CI.getCycle(Current->getParent());
1602 if (CurrentC && CurrentC == CI.getCycle(KillingDef->getParent()))
1603 return true;
1604 // Otherwise check the memory location is invariant to any loops.
1605 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1606}
1607
1608bool DSEState::isGuaranteedLoopInvariant(const Value *Ptr) {
1609 Ptr = Ptr->stripPointerCasts();
1610 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1611 if (GEP->hasAllConstantIndices())
1612 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1613
1614 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1615 return I->getParent()->isEntryBlock() || !CI.getCycle(I->getParent());
1616 }
1617 return true;
1618}
1619
1620std::optional<MemoryAccess *> DSEState::getDomMemoryDef(
1621 MemoryDef *KillingDef, MemoryAccess *StartAccess,
1622 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1623 unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm,
1624 unsigned &PartialLimit, bool IsInitializesAttrMemLoc) {
1625 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1626 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1627 return std::nullopt;
1628 }
1629
1630 MemoryAccess *Current = StartAccess;
1631 Instruction *KillingI = KillingDef->getMemoryInst();
1632 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1633
1634 // Only optimize defining access of KillingDef when directly starting at its
1635 // defining access. The defining access also must only access KillingLoc. At
1636 // the moment we only support instructions with a single write location, so
1637 // it should be sufficient to disable optimizations for instructions that
1638 // also read from memory.
1639 bool CanOptimize = OptimizeMemorySSA &&
1640 KillingDef->getDefiningAccess() == StartAccess &&
1641 !KillingI->mayReadFromMemory();
1642
1643 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1644 std::optional<MemoryLocation> CurrentLoc;
1645 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1646 LLVM_DEBUG({
1647 dbgs() << " visiting " << *Current;
1648 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1649 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1650 << ")";
1651 dbgs() << "\n";
1652 });
1653
1654 // Reached TOP.
1655 if (MSSA.isLiveOnEntryDef(Current)) {
1656 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1657 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1658 // The first clobbering def is... none.
1659 KillingDef->setOptimized(Current);
1660 return std::nullopt;
1661 }
1662
1663 // Cost of a step. Accesses in the same block are more likely to be valid
1664 // candidates for elimination, hence consider them cheaper.
1665 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1668 if (WalkerStepLimit <= StepCost) {
1669 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1670 return std::nullopt;
1671 }
1672 WalkerStepLimit -= StepCost;
1673
1674 // Return for MemoryPhis. They cannot be eliminated directly and the
1675 // caller is responsible for traversing them.
1676 if (isa<MemoryPhi>(Current)) {
1677 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1678 return Current;
1679 }
1680
1681 // Below, check if CurrentDef is a valid candidate to be eliminated by
1682 // KillingDef. If it is not, check the next candidate.
1683 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1684 Instruction *CurrentI = CurrentDef->getMemoryInst();
1685
1686 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1687 CanOptimize = false;
1688 continue;
1689 }
1690
1691 // Before we try to remove anything, check for any extra throwing
1692 // instructions that block us from DSEing
1693 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1694 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1695 return std::nullopt;
1696 }
1697
1698 // Check for anything that looks like it will be a barrier to further
1699 // removal
1700 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1701 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1702 return std::nullopt;
1703 }
1704
1705 // If Current is known to be on path that reads DefLoc or is a read
1706 // clobber, bail out, as the path is not profitable. We skip this check
1707 // for intrinsic calls, because the code knows how to handle memcpy
1708 // intrinsics.
1709 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1710 return std::nullopt;
1711
1712 // Quick check if there are direct uses that are read-clobbers.
1713 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1714 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1715 return !MSSA.dominates(StartAccess, UseOrDef) &&
1716 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1717 return false;
1718 })) {
1719 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1720 return std::nullopt;
1721 }
1722
1723 // If Current does not have an analyzable write location or is not
1724 // removable, skip it.
1725 CurrentLoc = getLocForWrite(CurrentI);
1726 if (!CurrentLoc || !isRemovable(CurrentI)) {
1727 CanOptimize = false;
1728 continue;
1729 }
1730
1731 // AliasAnalysis does not account for loops. Limit elimination to
1732 // candidates for which we can guarantee they always store to the same
1733 // memory location and not located in different loops.
1734 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1735 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1736 CanOptimize = false;
1737 continue;
1738 }
1739
1740 if (IsMemTerm) {
1741 // If the killing def is a memory terminator (e.g. lifetime.end), check
1742 // the next candidate if the current Current does not write the same
1743 // underlying object as the terminator.
1744 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1745 CanOptimize = false;
1746 continue;
1747 }
1748 } else {
1749 int64_t KillingOffset = 0;
1750 int64_t DeadOffset = 0;
1751 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1752 KillingOffset, DeadOffset);
1753 if (CanOptimize) {
1754 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1755 // optimized access. Do not optimize if CurrentDef is already the
1756 // defining access of KillingDef.
1757 if (CurrentDef != KillingDef->getDefiningAccess() &&
1758 (OR == OW_Complete || OR == OW_MaybePartial))
1759 KillingDef->setOptimized(CurrentDef);
1760
1761 // Once a may-aliasing def is encountered do not set an optimized
1762 // access.
1763 if (OR != OW_None)
1764 CanOptimize = false;
1765 }
1766
1767 // If Current does not write to the same object as KillingDef, check
1768 // the next candidate.
1769 if (OR == OW_Unknown || OR == OW_None)
1770 continue;
1771 else if (OR == OW_MaybePartial) {
1772 // If KillingDef only partially overwrites Current, check the next
1773 // candidate if the partial step limit is exceeded. This aggressively
1774 // limits the number of candidates for partial store elimination,
1775 // which are less likely to be removable in the end.
1776 if (PartialLimit <= 1) {
1777 WalkerStepLimit -= 1;
1778 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with "
1779 "next access\n");
1780 continue;
1781 }
1782 PartialLimit -= 1;
1783 }
1784 }
1785 break;
1786 };
1787
1788 // Accesses to objects accessible after the function returns can only be
1789 // eliminated if the access is dead along all paths to the exit. Collect
1790 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1791 // they cover all paths from MaybeDeadAccess to any function exit.
1792 SmallPtrSet<Instruction *, 16> KillingDefs;
1793 KillingDefs.insert(KillingDef->getMemoryInst());
1794 MemoryAccess *MaybeDeadAccess = Current;
1795 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1796 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1797 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1798 << *MaybeDeadI << ")\n");
1799
1801 SmallPtrSet<MemoryAccess *, 32> Visited;
1802 pushMemUses(MaybeDeadAccess, WorkList, Visited);
1803
1804 // Check if DeadDef may be read.
1805 for (unsigned I = 0; I < WorkList.size(); I++) {
1806 MemoryAccess *UseAccess = WorkList[I];
1807
1808 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1809 // Bail out if the number of accesses to check exceeds the scan limit.
1810 if (ScanLimit < (WorkList.size() - I)) {
1811 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1812 return std::nullopt;
1813 }
1814 --ScanLimit;
1815 NumDomMemDefChecks++;
1816
1817 if (isa<MemoryPhi>(UseAccess)) {
1818 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1819 return DT.properlyDominates(KI->getParent(), UseAccess->getBlock());
1820 })) {
1821 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1822 continue;
1823 }
1824 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1825 pushMemUses(UseAccess, WorkList, Visited);
1826 continue;
1827 }
1828
1829 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1830 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1831
1832 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1833 return DT.dominates(KI, UseInst);
1834 })) {
1835 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1836 continue;
1837 }
1838
1839 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1840 // MemoryAccesses. We do not have to check it's users.
1841 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1842 LLVM_DEBUG(
1843 dbgs()
1844 << " ... skipping, memterminator invalidates following accesses\n");
1845 continue;
1846 }
1847
1848 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1849 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1850 pushMemUses(UseAccess, WorkList, Visited);
1851 continue;
1852 }
1853
1854 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1855 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1856 return std::nullopt;
1857 }
1858
1859 // Uses which may read the original MemoryDef mean we cannot eliminate the
1860 // original MD. Stop walk.
1861 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1862 // the callee would be dominated by initializations, so it should be safe.
1863 bool IsKillingDefFromInitAttr = false;
1864 if (IsInitializesAttrMemLoc) {
1865 if (KillingI == UseInst &&
1866 KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))
1867 IsKillingDefFromInitAttr = true;
1868 }
1869
1870 if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1871 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1872 return std::nullopt;
1873 }
1874
1875 // If this worklist walks back to the original memory access (and the
1876 // pointer is not guarenteed loop invariant) then we cannot assume that a
1877 // store kills itself.
1878 if (MaybeDeadAccess == UseAccess &&
1879 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1880 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1881 return std::nullopt;
1882 }
1883 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1884 // if it reads the memory location.
1885 // TODO: It would probably be better to check for self-reads before
1886 // calling the function.
1887 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1888 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1889 continue;
1890 }
1891
1892 // Check all uses for MemoryDefs, except for defs completely overwriting
1893 // the original location. Otherwise we have to check uses of *all*
1894 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1895 // miss cases like the following
1896 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1897 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1898 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1899 // (The Use points to the *first* Def it may alias)
1900 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1901 // stores [0,1]
1902 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1903 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1904 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1905 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1906 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1907 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1908 KillingLoc.Size)) {
1910 << " ... found killing def " << *UseInst << "\n");
1911 KillingDefs.insert(UseInst);
1912 }
1913 } else {
1915 << " ... found preceeding def " << *UseInst << "\n");
1916 return std::nullopt;
1917 }
1918 } else
1919 pushMemUses(UseDef, WorkList, Visited);
1920 }
1921 }
1922
1923 // For accesses to locations visible after the function returns, make sure
1924 // that the location is dead (=overwritten) along all paths from
1925 // MaybeDeadAccess to the exit.
1926 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1927 KillingLoc.Size)) {
1928 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1929 for (Instruction *KD : KillingDefs)
1930 KillingBlocks.insert(KD->getParent());
1931 assert(!KillingBlocks.empty() &&
1932 "Expected at least a single killing block");
1933
1934 // Find the common post-dominator of all killing blocks.
1935 BasicBlock *CommonPred = *KillingBlocks.begin();
1936 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1937 if (!CommonPred)
1938 break;
1939 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1940 }
1941
1942 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1943 // there is a path from MaybeDeadAccess to an exit not going through a
1944 // killing block.
1945 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1946 if (!AnyUnreachableExit)
1947 return std::nullopt;
1948
1949 // Fall back to CFG scan starting at all non-unreachable roots if not
1950 // all paths to the exit go through CommonPred.
1951 CommonPred = nullptr;
1952 }
1953
1954 // If CommonPred itself is in the set of killing blocks, we're done.
1955 if (KillingBlocks.count(CommonPred))
1956 return {MaybeDeadAccess};
1957
1958 SetVector<BasicBlock *> WorkList;
1959 // If CommonPred is null, there are multiple exits from the function.
1960 // They all have to be added to the worklist.
1961 if (CommonPred)
1962 WorkList.insert(CommonPred);
1963 else
1964 for (BasicBlock *R : PDT.roots()) {
1965 if (!isa<UnreachableInst>(R->getTerminator()))
1966 WorkList.insert(R);
1967 }
1968
1969 NumCFGTries++;
1970 // Check if all paths starting from an exit node go through one of the
1971 // killing blocks before reaching MaybeDeadAccess.
1972 for (unsigned I = 0; I < WorkList.size(); I++) {
1973 NumCFGChecks++;
1974 BasicBlock *Current = WorkList[I];
1975 if (KillingBlocks.count(Current))
1976 continue;
1977 if (Current == MaybeDeadAccess->getBlock())
1978 return std::nullopt;
1979
1980 // MaybeDeadAccess is reachable from the entry, so we don't have to
1981 // explore unreachable blocks further.
1982 if (!DT.isReachableFromEntry(Current))
1983 continue;
1984
1985 WorkList.insert_range(predecessors(Current));
1986
1987 if (WorkList.size() >= MemorySSAPathCheckLimit)
1988 return std::nullopt;
1989 }
1990 NumCFGSuccess++;
1991 }
1992
1993 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1994 // potentially dead.
1995 return {MaybeDeadAccess};
1996}
1997
1998void DSEState::deleteDeadInstruction(Instruction *SI,
1999 SmallPtrSetImpl<MemoryAccess *> *Deleted) {
2000 MemorySSAUpdater Updater(&MSSA);
2001 SmallVector<Instruction *, 32> NowDeadInsts;
2002 NowDeadInsts.push_back(SI);
2003 --NumFastOther;
2004
2005 while (!NowDeadInsts.empty()) {
2006 Instruction *DeadInst = NowDeadInsts.pop_back_val();
2007 ++NumFastOther;
2008
2009 // Try to preserve debug information attached to the dead instruction.
2010 salvageDebugInfo(*DeadInst);
2011 salvageKnowledge(DeadInst);
2012
2013 // Remove the Instruction from MSSA.
2014 MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
2015 bool IsMemDef = MA && isa<MemoryDef>(MA);
2016 if (MA) {
2017 if (IsMemDef) {
2018 auto *MD = cast<MemoryDef>(MA);
2019 SkipStores.insert(MD);
2020 if (Deleted)
2021 Deleted->insert(MD);
2022 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
2023 if (SI->getValueOperand()->getType()->isPointerTy()) {
2024 const Value *UO = getUnderlyingObject(SI->getValueOperand());
2025 if (CapturedBeforeReturn.erase(UO))
2026 ShouldIterateEndOfFunctionDSE = true;
2027 InvisibleToCallerAfterRet.erase(UO);
2028 InvisibleToCallerAfterRetBounded.erase(UO);
2029 }
2030 }
2031 }
2032
2033 Updater.removeMemoryAccess(MA);
2034 }
2035
2036 auto I = IOLs.find(DeadInst->getParent());
2037 if (I != IOLs.end())
2038 I->second.erase(DeadInst);
2039 // Remove its operands
2040 for (Use &O : DeadInst->operands())
2041 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
2042 O.set(PoisonValue::get(O->getType()));
2043 if (isInstructionTriviallyDead(OpI, &TLI))
2044 NowDeadInsts.push_back(OpI);
2045 }
2046
2047 EA.removeInstruction(DeadInst);
2048 // Remove memory defs directly if they don't produce results, but only
2049 // queue other dead instructions for later removal. They may have been
2050 // used as memory locations that have been cached by BatchAA. Removing
2051 // them here may lead to newly created instructions to be allocated at the
2052 // same address, yielding stale cache entries.
2053 if (IsMemDef && DeadInst->getType()->isVoidTy())
2054 DeadInst->eraseFromParent();
2055 else
2056 ToRemove.push_back(DeadInst);
2057 }
2058}
2059
2060bool DSEState::mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
2061 const Value *KillingUndObj) {
2062 // First see if we can ignore it by using the fact that KillingI is an
2063 // alloca/alloca like object that is not visible to the caller during
2064 // execution of the function.
2065 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
2066 return false;
2067
2068 if (KillingI->getParent() == DeadI->getParent())
2069 return ThrowingBlocks.count(KillingI->getParent());
2070 return !ThrowingBlocks.empty();
2071}
2072
2073bool DSEState::isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
2074 // If DeadI may throw it acts as a barrier, unless we are to an
2075 // alloca/alloca like object that does not escape.
2076 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
2077 return true;
2078
2079 // If DeadI is an atomic load/store stronger than monotonic, do not try to
2080 // eliminate/reorder it.
2081 if (DeadI->isAtomic()) {
2082 if (auto *LI = dyn_cast<LoadInst>(DeadI))
2083 return isStrongerThanMonotonic(LI->getOrdering());
2084 if (auto *SI = dyn_cast<StoreInst>(DeadI))
2085 return isStrongerThanMonotonic(SI->getOrdering());
2086 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
2087 return isStrongerThanMonotonic(ARMW->getOrdering());
2088 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
2089 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
2090 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
2091 llvm_unreachable("other instructions should be skipped in MemorySSA");
2092 }
2093 return false;
2094}
2095
2096bool DSEState::eliminateDeadWritesAtEndOfFunction() {
2097 bool MadeChange = false;
2098 LLVM_DEBUG(
2099 dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n");
2100 do {
2101 ShouldIterateEndOfFunctionDSE = false;
2102 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
2103 if (SkipStores.contains(Def))
2104 continue;
2105
2106 Instruction *DefI = Def->getMemoryInst();
2107 auto DefLoc = getLocForWrite(DefI);
2108 if (!DefLoc || !isRemovable(DefI)) {
2109 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
2110 "instruction not removable.\n");
2111 continue;
2112 }
2113
2114 // NOTE: Currently eliminating writes at the end of a function is
2115 // limited to MemoryDefs with a single underlying object, to save
2116 // compile-time. In practice it appears the case with multiple
2117 // underlying objects is very uncommon. If it turns out to be important,
2118 // we can use getUnderlyingObjects here instead.
2119 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
2120 if (!isInvisibleToCallerAfterRet(UO, DefLoc->Ptr, DefLoc->Size))
2121 continue;
2122
2123 if (isWriteAtEndOfFunction(Def, *DefLoc)) {
2124 // See through pointer-to-pointer bitcasts
2125 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
2126 "of the function\n");
2128 ++NumFastStores;
2129 MadeChange = true;
2130 }
2131 }
2132 } while (ShouldIterateEndOfFunctionDSE);
2133 return MadeChange;
2134}
2135
2136bool DSEState::tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
2137 Instruction *DefI = Def->getMemoryInst();
2138 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2139 if (!MemSet)
2140 // TODO: Could handle zero store to small allocation as well.
2141 return false;
2142 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2143 if (!StoredConstant || !StoredConstant->isNullValue())
2144 return false;
2145
2146 if (!isRemovable(DefI))
2147 // The memset might be volatile..
2148 return false;
2149
2150 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
2151 F.hasFnAttribute(Attribute::SanitizeAddress) ||
2152 F.hasFnAttribute(Attribute::SanitizeHWAddress) || F.getName() == "calloc")
2153 return false;
2154 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
2155 if (!Malloc)
2156 return false;
2157 auto *InnerCallee = Malloc->getCalledFunction();
2158 if (!InnerCallee)
2159 return false;
2160 LibFunc Func = NotLibFunc;
2161 StringRef ZeroedVariantName;
2162 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
2163 Func != LibFunc_malloc) {
2164 Attribute Attr = Malloc->getFnAttr("alloc-variant-zeroed");
2165 if (!Attr.isValid())
2166 return false;
2167 ZeroedVariantName = Attr.getValueAsString();
2168 if (ZeroedVariantName.empty())
2169 return false;
2170 }
2171
2172 // Gracefully handle malloc with unexpected memory attributes.
2173 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
2174 if (!MallocDef)
2175 return false;
2176
2177 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2178 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2179 // of malloc block
2180 auto *MallocBB = Malloc->getParent(), *MemsetBB = Memset->getParent();
2181 if (MallocBB == MemsetBB)
2182 return true;
2183 auto *Ptr = Memset->getArgOperand(0);
2184 auto *TI = MallocBB->getTerminator();
2185 BasicBlock *TrueBB, *FalseBB;
2186 if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
2187 m_Zero()),
2188 TrueBB, FalseBB)))
2189 return false;
2190 if (MemsetBB != FalseBB)
2191 return false;
2192 return true;
2193 };
2194
2195 if (Malloc->getOperand(0) != MemSet->getLength())
2196 return false;
2197 if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Malloc, MemSet) ||
2198 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
2199 return false;
2200 IRBuilder<> IRB(Malloc);
2201 assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2202 Value *Calloc = nullptr;
2203 if (!ZeroedVariantName.empty()) {
2204 LLVMContext &Ctx = Malloc->getContext();
2205 AttributeList Attrs = InnerCallee->getAttributes();
2206 AllocFnKind AllocKind =
2207 Attrs.getFnAttr(Attribute::AllocKind).getAllocKind() |
2208 AllocFnKind::Zeroed;
2209 AllocKind &= ~AllocFnKind::Uninitialized;
2210 Attrs =
2211 Attrs.addFnAttribute(Ctx, Attribute::getWithAllocKind(Ctx, AllocKind))
2212 .removeFnAttribute(Ctx, "alloc-variant-zeroed");
2213 FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2214 ZeroedVariantName, InnerCallee->getFunctionType(), Attrs);
2215 cast<Function>(ZeroedVariant.getCallee())
2216 ->setCallingConv(Malloc->getCallingConv());
2218 Args.append(Malloc->arg_begin(), Malloc->arg_end());
2219 CallInst *CI = IRB.CreateCall(ZeroedVariant, Args, ZeroedVariantName);
2220 CI->setCallingConv(Malloc->getCallingConv());
2221 Calloc = CI;
2222 } else {
2223 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
2224 Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0),
2225 IRB, TLI, Malloc->getType()->getPointerAddressSpace());
2226 }
2227 if (!Calloc)
2228 return false;
2229
2230 MemorySSAUpdater Updater(&MSSA);
2231 auto *NewAccess = Updater.createMemoryAccessAfter(cast<Instruction>(Calloc),
2232 nullptr, MallocDef);
2233 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
2234 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
2235 Malloc->replaceAllUsesWith(Calloc);
2237 return true;
2238}
2239
2240bool DSEState::dominatingConditionImpliesValue(MemoryDef *Def) {
2241 auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
2242 BasicBlock *StoreBB = StoreI->getParent();
2243 Value *StorePtr = StoreI->getPointerOperand();
2244 Value *StoreVal = StoreI->getValueOperand();
2245
2246 DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
2247 if (!IDom)
2248 return false;
2249
2250 auto *BI = dyn_cast<CondBrInst>(IDom->getBlock()->getTerminator());
2251 if (!BI)
2252 return false;
2253
2254 // In case both blocks are the same, it is not possible to determine
2255 // if optimization is possible. (We would not want to optimize a store
2256 // in the FalseBB if condition is true and vice versa.)
2257 if (BI->getSuccessor(0) == BI->getSuccessor(1))
2258 return false;
2259
2260 Instruction *ICmpL;
2261 CmpPredicate Pred;
2262 if (!match(BI->getCondition(),
2263 m_c_ICmp(Pred,
2264 m_CombineAnd(m_Load(m_Specific(StorePtr)),
2265 m_Instruction(ICmpL)),
2266 m_Specific(StoreVal))) ||
2267 !ICmpInst::isEquality(Pred))
2268 return false;
2269
2270 // Ensure the replacement is allowed when comparing pointers, as
2271 // the equality compares addresses only, not pointers' provenance.
2272 if (StoreVal->getType()->isPointerTy() &&
2273 !canReplacePointersIfEqual(StoreVal, ICmpL, DL))
2274 return false;
2275
2276 // In case the else blocks also branches to the if block or the other way
2277 // around it is not possible to determine if the optimization is possible.
2278 if (Pred == ICmpInst::ICMP_EQ &&
2279 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
2280 StoreBB))
2281 return false;
2282
2283 if (Pred == ICmpInst::ICMP_NE &&
2284 !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
2285 StoreBB))
2286 return false;
2287
2288 MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
2289 MemoryAccess *ClobAcc =
2290 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
2291
2292 return MSSA.dominates(ClobAcc, LoadAcc);
2293}
2294
2295bool DSEState::storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2296 Instruction *DefI = Def->getMemoryInst();
2297 StoreInst *Store = dyn_cast<StoreInst>(DefI);
2298 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2299 Constant *StoredConstant = nullptr;
2300 if (Store)
2301 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
2302 else if (MemSet)
2303 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2304 else
2305 return false;
2306
2307 if (!isRemovable(DefI))
2308 return false;
2309
2310 if (StoredConstant) {
2311 Constant *InitC =
2312 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
2313 // If the clobbering access is LiveOnEntry, no instructions between them
2314 // can modify the memory location.
2315 if (InitC && InitC == StoredConstant)
2316 return MSSA.isLiveOnEntryDef(
2317 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2318 }
2319
2320 if (!Store)
2321 return false;
2322
2323 if (dominatingConditionImpliesValue(Def))
2324 return true;
2325
2326 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2327 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2328 // Get the defining access for the load.
2329 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2330 // Fast path: the defining accesses are the same.
2331 if (LoadAccess == Def->getDefiningAccess())
2332 return true;
2333
2334 // Look through phi accesses. Recursively scan all phi accesses by
2335 // adding them to a worklist. Bail when we run into a memory def that
2336 // does not match LoadAccess.
2337 SetVector<MemoryAccess *> ToCheck;
2338 MemoryAccess *Current =
2339 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2340 // We don't want to bail when we run into the store memory def. But,
2341 // the phi access may point to it. So, pretend like we've already
2342 // checked it.
2343 ToCheck.insert(Def);
2344 ToCheck.insert(Current);
2345 // Start at current (1) to simulate already having checked Def.
2346 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2347 Current = ToCheck[I];
2348 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2349 // Check all the operands.
2350 for (auto &Use : PhiAccess->incoming_values())
2351 ToCheck.insert(cast<MemoryAccess>(&Use));
2352 continue;
2353 }
2354
2355 // If we found a memory def, bail. This happens when we have an
2356 // unrelated write in between an otherwise noop store.
2357 assert(isa<MemoryDef>(Current) && "Only MemoryDefs should reach here.");
2358 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2359 // We are searching for the definition of the store's destination.
2360 // So, if that is the same definition as the load, then this is a
2361 // noop. Otherwise, fail.
2362 if (LoadAccess != Current)
2363 return false;
2364 }
2365 return true;
2366 }
2367 }
2368
2369 return false;
2370}
2371
2372bool DSEState::removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2373 bool Changed = false;
2374 for (auto OI : IOL) {
2375 Instruction *DeadI = OI.first;
2376 MemoryLocation Loc = *getLocForWrite(DeadI);
2377 assert(isRemovable(DeadI) && "Expect only removable instruction");
2378
2379 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2380 int64_t DeadStart = 0;
2381 uint64_t DeadSize = Loc.Size.getValue();
2382 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2383 OverlapIntervalsTy &IntervalMap = OI.second;
2384 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2385 if (IntervalMap.empty())
2386 continue;
2387 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2388 }
2389 return Changed;
2390}
2391
2392bool DSEState::eliminateRedundantStoresOfExistingValues() {
2393 bool MadeChange = false;
2394 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2395 "already existing value\n");
2396 for (auto *Def : MemDefs) {
2397 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2398 continue;
2399
2400 Instruction *DefInst = Def->getMemoryInst();
2401 auto MaybeDefLoc = getLocForWrite(DefInst);
2402 if (!MaybeDefLoc || !isRemovable(DefInst))
2403 continue;
2404
2405 MemoryDef *UpperDef;
2406 // To conserve compile-time, we avoid walking to the next clobbering def.
2407 // Instead, we just try to get the optimized access, if it exists. DSE
2408 // will try to optimize defs during the earlier traversal.
2409 if (Def->isOptimized())
2410 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2411 else
2412 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2413 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2414 continue;
2415
2416 Instruction *UpperInst = UpperDef->getMemoryInst();
2417 auto IsRedundantStore = [&]() {
2418 // We don't care about differences in call attributes here.
2419 if (DefInst->isIdenticalToWhenDefined(UpperInst,
2420 /*IntersectAttrs=*/true))
2421 return true;
2422 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2423 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2424 // MemSetInst must have a write location.
2425 auto UpperLoc = getLocForWrite(UpperInst);
2426 if (!UpperLoc)
2427 return false;
2428 int64_t InstWriteOffset = 0;
2429 int64_t DepWriteOffset = 0;
2430 auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2431 InstWriteOffset, DepWriteOffset);
2432 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2433 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2434 OR == OW_Complete;
2435 }
2436 }
2437 return false;
2438 };
2439
2440 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2441 continue;
2442 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2443 << '\n');
2444 deleteDeadInstruction(DefInst);
2445 NumRedundantStores++;
2446 MadeChange = true;
2447 }
2448 return MadeChange;
2449}
2450
2452DSEState::getInitializesArgMemLoc(const Instruction *I) {
2453 const CallBase *CB = dyn_cast<CallBase>(I);
2454 if (!CB)
2455 return {};
2456
2457 // Collect aliasing arguments and their initializes ranges.
2458 SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
2459 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2460 Value *CurArg = CB->getArgOperand(Idx);
2461 if (!CurArg->getType()->isPointerTy())
2462 continue;
2463
2464 ConstantRangeList Inits;
2465 Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
2466 // initializes on byval arguments refers to the callee copy, not the
2467 // original memory the caller passed in.
2468 if (InitializesAttr.isValid() && !CB->isByValArgument(Idx))
2469 Inits = InitializesAttr.getValueAsConstantRangeList();
2470
2471 // Check whether "CurArg" could alias with global variables. We require
2472 // either it's function local and isn't captured before or the "CB" only
2473 // accesses arg or inaccessible mem.
2474 if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2475 !isFuncLocalAndNotCaptured(CurArg, CB, EA))
2476 Inits = ConstantRangeList();
2477
2478 // We don't perform incorrect DSE on unwind edges in the current function,
2479 // and use the "initializes" attribute to kill dead stores if:
2480 // - The call does not throw exceptions, "CB->doesNotThrow()".
2481 // - Or the callee parameter has "dead_on_unwind" attribute.
2482 // - Or the argument is invisible to caller on unwind, and there are no
2483 // unwind edges from this call in the current function (e.g. `CallInst`).
2484 bool IsDeadOrInvisibleOnUnwind =
2485 CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
2486 (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
2487 ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
2488 bool FoundAliasing = false;
2489 for (auto &[Arg, AliasList] : Arguments) {
2490 auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
2492 if (AAR == AliasResult::NoAlias) {
2493 continue;
2494 } else if (AAR == AliasResult::MustAlias) {
2495 FoundAliasing = true;
2496 AliasList.push_back(InitInfo);
2497 } else {
2498 // For PartialAlias and MayAlias, there is an offset or may be an
2499 // unknown offset between the arguments and we insert an empty init
2500 // range to discard the entire initializes info while intersecting.
2501 FoundAliasing = true;
2502 AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,
2503 ConstantRangeList()});
2504 }
2505 }
2506 if (!FoundAliasing)
2507 Arguments[CurArg] = {InitInfo};
2508 }
2509
2511 for (const auto &[_, Args] : Arguments) {
2512 auto IntersectedRanges =
2514 if (IntersectedRanges.empty())
2515 continue;
2516
2517 for (const auto &Arg : Args) {
2518 for (const auto &Range : IntersectedRanges) {
2519 int64_t Start = Range.getLower().getSExtValue();
2520 int64_t End = Range.getUpper().getSExtValue();
2521 // For now, we only handle locations starting at offset 0.
2522 if (Start == 0)
2523 Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
2524 LocationSize::precise(End - Start),
2525 CB->getAAMetadata()));
2526 }
2527 }
2528 }
2529 return Locations;
2530}
2531
2532std::pair<bool, bool>
2533DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2534 bool Changed = false;
2535 bool DeletedKillingLoc = false;
2536 unsigned ScanLimit = MemorySSAScanLimit;
2537 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2538 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2539 // Worklist of MemoryAccesses that may be killed by
2540 // "KillingLocWrapper.MemDef".
2541 SmallSetVector<MemoryAccess *, 8> ToCheck;
2542 // Track MemoryAccesses that have been deleted in the loop below, so we can
2543 // skip them. Don't use SkipStores for this, which may contain reused
2544 // MemoryAccess addresses.
2545 SmallPtrSet<MemoryAccess *, 8> Deleted;
2546 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2547 ToCheck.insert(KillingLocWrapper.MemDef->getDefiningAccess());
2548
2549 // Check if MemoryAccesses in the worklist are killed by
2550 // "KillingLocWrapper.MemDef".
2551 for (unsigned I = 0; I < ToCheck.size(); I++) {
2552 MemoryAccess *Current = ToCheck[I];
2553 if (Deleted.contains(Current))
2554 continue;
2555 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2556 KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
2557 KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2558 isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
2559 KillingLocWrapper.DefByInitializesAttr);
2560
2561 if (!MaybeDeadAccess) {
2562 LLVM_DEBUG(dbgs() << " finished walk\n");
2563 continue;
2564 }
2565 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2566 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2567 if (isa<MemoryPhi>(DeadAccess)) {
2568 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2569 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2570 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2571 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2572 BasicBlock *PhiBlock = DeadAccess->getBlock();
2573
2574 // We only consider incoming MemoryAccesses that come before the
2575 // MemoryPhi. Otherwise we could discover candidates that do not
2576 // strictly dominate our starting def.
2577 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2578 ToCheck.insert(IncomingAccess);
2579 }
2580 continue;
2581 }
2582 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2583 // It would incorrectly consider a call instruction as redundant store
2584 // and remove this call instruction.
2585 // TODO: this conflates the existence of a MemoryLocation with being able
2586 // to delete the instruction. Fix isRemovable() to consider calls with
2587 // side effects that cannot be removed, e.g. calls with the initializes
2588 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2589 MemoryDefWrapper DeadDefWrapper(
2590 cast<MemoryDef>(DeadAccess),
2591 getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
2592 /*ConsiderInitializesAttr=*/false));
2593 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2594 MemoryLocationWrapper &DeadLocWrapper =
2595 DeadDefWrapper.DefinedLocations.front();
2596 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2597 ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
2598 NumGetDomMemoryDefPassed++;
2599
2600 if (!DebugCounter::shouldExecute(MemorySSACounter))
2601 continue;
2602 if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {
2603 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2604 continue;
2605 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2606 << *DeadLocWrapper.DefInst << "\n KILLER: "
2607 << *KillingLocWrapper.DefInst << '\n');
2608 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2609 ++NumFastStores;
2610 Changed = true;
2611 } else {
2612 // Check if DeadI overwrites KillingI.
2613 int64_t KillingOffset = 0;
2614 int64_t DeadOffset = 0;
2615 OverwriteResult OR =
2616 isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,
2617 KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2618 KillingOffset, DeadOffset);
2619 if (OR == OW_MaybePartial) {
2620 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2621 OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2622 KillingOffset, DeadOffset,
2623 DeadLocWrapper.DefInst, IOL);
2624 }
2625 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2626 auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);
2627 auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);
2628 // We are re-using tryToMergePartialOverlappingStores, which requires
2629 // DeadSI to dominate KillingSI.
2630 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2631 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2632 if (Constant *Merged = tryToMergePartialOverlappingStores(
2633 KillingSI, DeadSI, KillingOffset, DeadOffset, DL, BatchAA,
2634 &DT)) {
2635
2636 // Update stored value of earlier store to merged constant.
2637 DeadSI->setOperand(0, Merged);
2638 ++NumModifiedStores;
2639 Changed = true;
2640 DeletedKillingLoc = true;
2641
2642 // Remove killing store and remove any outstanding overlap
2643 // intervals for the updated store.
2644 deleteDeadInstruction(KillingSI, &Deleted);
2645 auto I = IOLs.find(DeadSI->getParent());
2646 if (I != IOLs.end())
2647 I->second.erase(DeadSI);
2648 break;
2649 }
2650 }
2651 }
2652 if (OR == OW_Complete) {
2653 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2654 << *DeadLocWrapper.DefInst << "\n KILLER: "
2655 << *KillingLocWrapper.DefInst << '\n');
2656 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2657 ++NumFastStores;
2658 Changed = true;
2659 }
2660 }
2661 }
2662
2663 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2664 "SkipStores and Deleted out of sync?");
2665
2666 return {Changed, DeletedKillingLoc};
2667}
2668
2669bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2670 if (KillingDefWrapper.DefinedLocations.empty()) {
2671 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2672 << *KillingDefWrapper.DefInst << "\n");
2673 return false;
2674 }
2675
2676 bool MadeChange = false;
2677 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2678 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2679 << *KillingLocWrapper.MemDef << " ("
2680 << *KillingLocWrapper.DefInst << ")\n");
2681 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2682 MadeChange |= Changed;
2683
2684 // Check if the store is a no-op.
2685 if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
2686 KillingLocWrapper.UnderlyingObject)) {
2687 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2688 << *KillingLocWrapper.DefInst << '\n');
2689 deleteDeadInstruction(KillingLocWrapper.DefInst);
2690 NumRedundantStores++;
2691 MadeChange = true;
2692 continue;
2693 }
2694 // Can we form a calloc from a memset/malloc pair?
2695 if (!DeletedKillingLoc &&
2696 tryFoldIntoCalloc(KillingLocWrapper.MemDef,
2697 KillingLocWrapper.UnderlyingObject)) {
2698 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2699 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2700 deleteDeadInstruction(KillingLocWrapper.DefInst);
2701 MadeChange = true;
2702 continue;
2703 }
2704 }
2705 return MadeChange;
2706}
2707
2710 const TargetLibraryInfo &TLI,
2711 const CycleInfo &CI) {
2712 bool MadeChange = false;
2713 DSEState State(F, AA, MSSA, DT, PDT, TLI, CI);
2714 // For each store:
2715 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2716 MemoryDef *KillingDef = State.MemDefs[I];
2717 if (State.SkipStores.count(KillingDef))
2718 continue;
2719
2720 MemoryDefWrapper KillingDefWrapper(
2721 KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
2723 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2724 }
2725
2727 for (auto &KV : State.IOLs)
2728 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2729
2730 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2731 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2732
2733 while (!State.ToRemove.empty()) {
2734 Instruction *DeadInst = State.ToRemove.pop_back_val();
2735 DeadInst->eraseFromParent();
2736 }
2737
2738 return MadeChange;
2739}
2740
2741//===----------------------------------------------------------------------===//
2742// DSE Pass
2743//===----------------------------------------------------------------------===//
2748 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2751
2752 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, CI);
2753
2754#ifdef LLVM_ENABLE_STATS
2756 for (auto &I : instructions(F))
2757 NumRemainingStores += isa<StoreInst>(&I);
2758#endif
2759
2760 if (!Changed)
2761 return PreservedAnalyses::all();
2762
2766 return PA;
2767}
2768
2769namespace {
2770
2771/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2772class DSELegacyPass : public FunctionPass {
2773public:
2774 static char ID; // Pass identification, replacement for typeid
2775
2776 DSELegacyPass() : FunctionPass(ID) {
2778 }
2779
2780 bool runOnFunction(Function &F) override {
2781 if (skipFunction(F))
2782 return false;
2783
2784 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2785 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2786 const TargetLibraryInfo &TLI =
2787 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2788 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2789 PostDominatorTree &PDT =
2790 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2791 CycleInfo &CI = getAnalysis<CycleInfoWrapperPass>().getResult();
2792
2793 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, CI);
2794
2795#ifdef LLVM_ENABLE_STATS
2797 for (auto &I : instructions(F))
2798 NumRemainingStores += isa<StoreInst>(&I);
2799#endif
2800
2801 return Changed;
2802 }
2803
2804 void getAnalysisUsage(AnalysisUsage &AU) const override {
2805 AU.setPreservesCFG();
2806 AU.addRequired<AAResultsWrapperPass>();
2807 AU.addRequired<TargetLibraryInfoWrapperPass>();
2808 AU.addPreserved<GlobalsAAWrapperPass>();
2809 AU.addRequired<DominatorTreeWrapperPass>();
2810 AU.addPreserved<DominatorTreeWrapperPass>();
2811 AU.addRequired<PostDominatorTreeWrapperPass>();
2812 AU.addRequired<MemorySSAWrapperPass>();
2813 AU.addPreserved<PostDominatorTreeWrapperPass>();
2814 AU.addPreserved<MemorySSAWrapperPass>();
2815 AU.addRequired<CycleInfoWrapperPass>();
2816 AU.addPreserved<CycleInfoWrapperPass>();
2817 AU.addRequired<AssumptionCacheTracker>();
2818 }
2819};
2820
2821} // end anonymous namespace
2822
2823char DSELegacyPass::ID = 0;
2824
2825INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2826 false)
2836INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2837 false)
2838
2840 return new DSELegacyPass();
2841}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI, const CycleInfo &CI)
MapVector< Instruction *, OverlapIntervalsTy > InstOverlapIntervalsTy
static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller)
static cl::opt< bool > EnableInitializesImprovement("enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE"))
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static bool isNoopIntrinsic(Instruction *I)
static ConstantRangeList getIntersectedInitRangeList(ArrayRef< ArgumentInitInfo > Args, bool CallHasNoUnwindAttr)
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static void pushMemUses(MemoryAccess *Acc, SmallVectorImpl< MemoryAccess * > &WorkList, SmallPtrSetImpl< MemoryAccess * > &Visited)
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, EarliestEscapeAnalysis &EA)
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo, uint64_t PtrOffset)
Update the attributes given that a memory access is updated (the dereferenced pointer could be moved ...
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static bool hasInitializesAttr(Instruction *I)
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
early cse Early CSE w MemorySSA
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An immutable pass that tracks lazily created AssumptionCache objects.
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
LLVM_ABI ArrayRef< ConstantRange > getValueAsConstantRangeList() const
Return the attribute's value as a ConstantRange array.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Get the attribute of a given kind from a given arg.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
LLVM_ABI bool onlyAccessesInaccessibleMemOrArgMem() const
Determine if the function may only access memory that is either inaccessible from the IR or pointed t...
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
LLVM_ABI Value * getArgOperandWithAttribute(Attribute::AttrKind Kind) const
If one of the arguments has the specified attribute, returns its operand value.
unsigned arg_size() const
This class represents a list of constant ranges.
bool empty() const
Return true if this list contains no members.
LLVM_ABI ConstantRangeList intersectWith(const ConstantRangeList &CRL) const
Return the range list that results from the intersection of this ConstantRangeList with another Const...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
Analysis pass which computes a CycleInfo.
Legacy analysis pass which computes a CycleInfo.
static DIAssignID * getDistinct(LLVMContext &Context)
DbgVariableFragmentInfo FragmentInfo
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(CounterInfo &Counter)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
CaptureComponents getCapturesBefore(const Value *Object, const Instruction *I, bool OrAt) override
Return how Object may be captured before instruction I, considering only provenance captures.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
bool isEquality() const
Return true if this predicate is either EQ or NE.
LLVM_ABI bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
const_iterator begin() const
bool empty() const
empty - Return true when no intervals are mapped.
const_iterator end() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
bool hasValue() const
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
Value * getLength() const
Value * getValue() const
BasicBlock * getBlock() const
Definition MemorySSA.h:162
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
void setOptimized(MemoryAccess *MA)
Definition MemorySSA.h:392
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1039
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:979
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI MemorySSAWalker * getSkipSelfWalker()
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition MemorySSA.h:257
PHITransAddr - An address value which tracks and handles phi translation.
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
LLVM_ABI bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Value * getAddr() const
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
iterator begin() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
iterator_range< use_iterator > uses()
Definition Value.h:381
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:203
LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgVariableRecord *DVRAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
@ Uninitialized
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
AllocFnKind
Definition Attributes.h:53
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1725
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< po_iterator< T > > post_order(const T &G)
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI bool canReplacePointersIfEqual(const Value *From, const Value *To, const DataLayout &DL)
Returns true if a pointer value From can be replaced with another pointer value \To if they are deeme...
Definition Loads.cpp:879
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
LLVM_ABI Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI, unsigned AddrSpace)
Emit a call to the calloc function.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition Alignment.h:186
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI FunctionPass * createDeadStoreEliminationPass()
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool capturesAnything(CaptureComponents CC)
Definition ModRef.h:379
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:375
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.