LLVM 23.0.0git
GenericUniformityImpl.h
Go to the documentation of this file.
1//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This template implementation resides in a separate file so that it
10// does not get injected into every .cpp file that includes the
11// generic header.
12//
13// DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
14//
15// This file should only be included by files that implement a
16// specialization of the relvant templates. Currently these are:
17// - UniformityAnalysis.cpp
18//
19// Note: The DEBUG_TYPE macro should be defined before using this
20// file so that any use of LLVM_DEBUG is associated with the
21// including file rather than this file.
22//
23//===----------------------------------------------------------------------===//
24///
25/// \file
26/// \brief Implementation of uniformity analysis.
27///
28/// The algorithm is a fixed point iteration that starts with the assumption
29/// that all control flow and all values are uniform. Starting from sources of
30/// divergence (whose discovery must be implemented by a CFG- or even
31/// target-specific derived class), divergence of values is propagated from
32/// definition to uses in a straight-forward way. The main complexity lies in
33/// the propagation of the impact of divergent control flow on the divergence of
34/// values (sync dependencies).
35///
36/// NOTE: In general, no interface exists for a transform to update
37/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
38/// transitive dependence, but it also does not provide an interface for
39/// updating itself. Given that, transforms should not preserve uniformity in
40/// their getAnalysisUsage() callback.
41///
42//===----------------------------------------------------------------------===//
43
44#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
45#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
46
48
49#include "llvm/ADT/DenseSet.h"
50#include "llvm/ADT/STLExtras.h"
55
56#define DEBUG_TYPE "uniformity"
57
58namespace llvm {
59
60// Forward decl from llvm/CodeGen/MachineInstr.h
61class MachineInstr;
62
63/// Construct a specially modified post-order traversal of cycles.
64///
65/// The ModifiedPO is contructed using a virtually modified CFG as follows:
66///
67/// 1. The successors of pre-entry nodes (predecessors of an cycle
68/// entry that are outside the cycle) are replaced by the
69/// successors of the successors of the header.
70/// 2. Successors of the cycle header are replaced by the exit blocks
71/// of the cycle.
72///
73/// Effectively, we produce a depth-first numbering with the following
74/// properties:
75///
76/// 1. Nodes after a cycle are numbered earlier than the cycle header.
77/// 2. The header is numbered earlier than the nodes in the cycle.
78/// 3. The numbering of the nodes within the cycle forms an interval
79/// starting with the header.
80///
81/// Effectively, the virtual modification arranges the nodes in a
82/// cycle as a DAG with the header as the sole leaf, and successors of
83/// the header as the roots. A reverse traversal of this numbering has
84/// the following invariant on the unmodified original CFG:
85///
86/// Each node is visited after all its predecessors, except if that
87/// predecessor is the cycle header.
88///
89template <typename ContextT> class ModifiedPostOrder {
90public:
91 using BlockT = typename ContextT::BlockT;
92 using FunctionT = typename ContextT::FunctionT;
93 using DominatorTreeT = typename ContextT::DominatorTreeT;
94
96 using CycleT = typename CycleInfoT::CycleT;
97 using const_iterator = typename std::vector<BlockT *>::const_iterator;
98
99 ModifiedPostOrder(const ContextT &C) : Context(C) {}
100
101 bool empty() const { return Order.empty(); }
102 size_t size() const { return Order.size(); }
103
104 void clear() { Order.clear(); }
105 void compute(const CycleInfoT &CI);
106
107 unsigned count(BlockT *BB) const { return POIndex.count(BB); }
108 const BlockT *operator[](size_t Idx) const { return Order[Idx]; }
109
110 void appendBlock(const BlockT &BB, bool IsReducibleCycleHeader = false) {
111 POIndex[&BB] = Order.size();
112 Order.push_back(&BB);
113 LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
114 << "): " << Context.print(&BB) << "\n");
115 if (IsReducibleCycleHeader)
116 ReducibleCycleHeaders.insert(&BB);
117 }
118
119 unsigned getIndex(const BlockT *BB) const {
120 assert(POIndex.count(BB));
121 return POIndex.lookup(BB);
122 }
123
124 bool isReducibleCycleHeader(const BlockT *BB) const {
125 return ReducibleCycleHeaders.contains(BB);
126 }
127
128private:
131 SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
132 const ContextT &Context;
133
134 void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
136
137 void computeStackPO(SmallVectorImpl<const BlockT *> &Stack,
138 const CycleInfoT &CI, const CycleT *Cycle,
140};
141
142template <typename> class DivergencePropagator;
143
144/// \class GenericSyncDependenceAnalysis
145///
146/// \brief Locate join blocks for disjoint paths starting at a divergent branch.
147///
148/// An analysis per divergent branch that returns the set of basic
149/// blocks whose phi nodes become divergent due to divergent control.
150/// These are the blocks that are reachable by two disjoint paths from
151/// the branch, or cycle exits reachable along a path that is disjoint
152/// from a path to the cycle latch.
153
154// --- Above line is not a doxygen comment; intentionally left blank ---
155//
156// Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
157//
158// The SyncDependenceAnalysis is used in the UniformityAnalysis to model
159// control-induced divergence in phi nodes.
160//
161// -- Reference --
162// The algorithm is an extension of Section 5 of
163//
164// An abstract interpretation for SPMD divergence
165// on reducible control flow graphs.
166// Julian Rosemann, Simon Moll and Sebastian Hack
167// POPL '21
168//
169//
170// -- Sync dependence --
171// Sync dependence characterizes the control flow aspect of the
172// propagation of branch divergence. For example,
173//
174// %cond = icmp slt i32 %tid, 10
175// br i1 %cond, label %then, label %else
176// then:
177// br label %merge
178// else:
179// br label %merge
180// merge:
181// %a = phi i32 [ 0, %then ], [ 1, %else ]
182//
183// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
184// because %tid is not on its use-def chains, %a is sync dependent on %tid
185// because the branch "br i1 %cond" depends on %tid and affects which value %a
186// is assigned to.
187//
188//
189// -- Reduction to SSA construction --
190// There are two disjoint paths from A to X, if a certain variant of SSA
191// construction places a phi node in X under the following set-up scheme.
192//
193// This variant of SSA construction ignores incoming undef values.
194// That is paths from the entry without a definition do not result in
195// phi nodes.
196//
197// entry
198// / \
199// A \
200// / \ Y
201// B C /
202// \ / \ /
203// D E
204// \ /
205// F
206//
207// Assume that A contains a divergent branch. We are interested
208// in the set of all blocks where each block is reachable from A
209// via two disjoint paths. This would be the set {D, F} in this
210// case.
211// To generally reduce this query to SSA construction we introduce
212// a virtual variable x and assign to x different values in each
213// successor block of A.
214//
215// entry
216// / \
217// A \
218// / \ Y
219// x = 0 x = 1 /
220// \ / \ /
221// D E
222// \ /
223// F
224//
225// Our flavor of SSA construction for x will construct the following
226//
227// entry
228// / \
229// A \
230// / \ Y
231// x0 = 0 x1 = 1 /
232// \ / \ /
233// x2 = phi E
234// \ /
235// x3 = phi
236//
237// The blocks D and F contain phi nodes and are thus each reachable
238// by two disjoins paths from A.
239//
240// -- Remarks --
241// * In case of cycle exits we need to check for temporal divergence.
242// To this end, we check whether the definition of x differs between the
243// cycle exit and the cycle header (_after_ SSA construction).
244//
245// * In the presence of irreducible control flow, the fixed point is
246// reached only after multiple iterations. This is because labels
247// reaching the header of a cycle must be repropagated through the
248// cycle. This is true even in a reducible cycle, since the labels
249// may have been produced by a nested irreducible cycle.
250//
251// * Note that SyncDependenceAnalysis is not concerned with the points
252// of convergence in an irreducible cycle. It's only purpose is to
253// identify join blocks. The "diverged entry" criterion is
254// separately applied on join blocks to determine if an entire
255// irreducible cycle is assumed to be divergent.
256//
257// * Relevant related work:
258// A simple algorithm for global data flow analysis problems.
259// Matthew S. Hecht and Jeffrey D. Ullman.
260// SIAM Journal on Computing, 4(4):519–532, December 1975.
261//
262template <typename ContextT> class GenericSyncDependenceAnalysis {
263public:
264 using BlockT = typename ContextT::BlockT;
265 using DominatorTreeT = typename ContextT::DominatorTreeT;
266 using FunctionT = typename ContextT::FunctionT;
267 using ValueRefT = typename ContextT::ValueRefT;
268 using InstructionT = typename ContextT::InstructionT;
269
271 using CycleT = typename CycleInfoT::CycleT;
272
275
276 // * if BlockLabels[B] == C then C is the dominating definition at
277 // block B
278 // * if BlockLabels[B] == nullptr then we haven't seen B yet
279 // * if BlockLabels[B] == B then:
280 // - B is a join point of disjoint paths from X, or,
281 // - B is an immediate successor of X (initial value), or,
282 // - B is X
284
285 /// Information discovered by the sync dependence analysis for each
286 /// divergent branch.
288 // Join points of diverged paths.
290 // Divergent cycle exits
292 // Labels assigned to blocks on diverged paths.
294 };
295
297
298 GenericSyncDependenceAnalysis(const ContextT &Context,
299 const DominatorTreeT &DT, const CycleInfoT &CI);
300
301 /// \brief Computes divergent join points and cycle exits caused by branch
302 /// divergence in \p Term.
303 ///
304 /// This returns a pair of sets:
305 /// * The set of blocks which are reachable by disjoint paths from
306 /// \p Term.
307 /// * The set also contains cycle exits if there two disjoint paths:
308 /// one from \p Term to the cycle exit and another from \p Term to
309 /// the cycle header.
310 const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
311
312private:
313 static inline DivergenceDescriptor EmptyDivergenceDesc;
314
315 ModifiedPO CyclePO;
316
317 const DominatorTreeT &DT;
318 const CycleInfoT &CI;
319
321 CachedControlDivDescs;
322};
323
324/// \brief Analysis that identifies uniform values in a data-parallel
325/// execution.
326///
327/// This analysis propagates divergence in a data-parallel context
328/// from sources of divergence to all users. It can be instantiated
329/// for an IR that provides a suitable SSAContext.
330template <typename ContextT> class GenericUniformityAnalysisImpl {
331public:
332 using BlockT = typename ContextT::BlockT;
333 using FunctionT = typename ContextT::FunctionT;
334 using ValueRefT = typename ContextT::ValueRefT;
335 using ConstValueRefT = typename ContextT::ConstValueRefT;
336 using UseT = typename ContextT::UseT;
337 using InstructionT = typename ContextT::InstructionT;
338 using DominatorTreeT = typename ContextT::DominatorTreeT;
339
341 using CycleT = typename CycleInfoT::CycleT;
342
345 typename SyncDependenceAnalysisT::DivergenceDescriptor;
346 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
347
349 std::tuple<ConstValueRefT, InstructionT *, const CycleT *>;
350
353 : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
354 TTI(TTI), DT(DT), SDA(Context, DT, CI) {}
355
357
358 const FunctionT &getFunction() const { return F; }
359
360 /// \brief Mark \p UniVal as a value that is always uniform.
361 void addUniformOverride(const InstructionT &Instr);
362
363 /// \brief Examine \p I for divergent outputs and add to the worklist.
364 void markDivergent(const InstructionT &I);
365
366 /// \brief Mark \p DivVal as a divergent value by removing it from
367 /// UniformValues. \returns Whether the tracked divergence state of
368 /// \p DivVal changed.
369 bool markDivergent(ConstValueRefT DivVal);
370
371 /// \brief Mark outputs of \p Instr as divergent.
372 /// \returns Whether the tracked divergence state of any output has changed.
373 bool markDefsDivergent(const InstructionT &Instr);
374
375 /// \brief Propagate divergence to all instructions in the region.
376 /// Divergence is seeded by calls to \p markDivergent.
377 void compute();
378
379 /// \brief Whether \p Val will always return a uniform value regardless of its
380 /// operands
381 bool isAlwaysUniform(const InstructionT &Instr) const;
382
383 bool hasDivergentDefs(const InstructionT &I) const;
384
386 assert(I.isTerminator() && "Expected a terminator instruction!");
387 return DivergentTermBlocks.contains(I.getParent());
388 };
389
390 /// \brief Whether \p Val is divergent at its definition.
391 /// When the target has no branch divergence, compute() is never called
392 /// and everything is uniform. Otherwise, values not in UniformValues
393 /// (e.g. newly created) are conservatively treated as divergent.
396 return false;
397 // Only values that were present during analysis are tracked in
398 // UniformValues (Instructions/Arguments for IR, Registers for MIR).
399 // Other values (e.g. constants, globals) are always uniform but are
400 // not added to UniformValues; this check avoids false divergence.
401 if (ContextT::isAlwaysUniform(V))
402 return false;
403 return !UniformValues.contains(V);
404 }
405
406 bool isDivergentUse(const UseT &U) const;
407
408 bool hasDivergentTerminator(const BlockT &B) const {
409 return DivergentTermBlocks.contains(&B);
410 }
411
412 void print(raw_ostream &Out) const;
413
414 /// Print divergent arguments and return true if any were found.
415 /// IR specialization iterates F.args(); default is a no-op.
416 bool printDivergentArgs(raw_ostream &Out) const;
417
419
421 const CycleT *);
422
423 /// Check if an instruction with Custom uniformity can be proven uniform
424 /// based on its operands. This queries the target-specific callback.
425 bool isCustomUniform(const InstructionT &I) const;
426
427 /// \brief Add an instruction that requires custom uniformity analysis.
429
430protected:
431 const ContextT &Context;
432 const FunctionT &F;
434 const TargetTransformInfo *TTI = nullptr;
435
436 // Whether the target has branch divergence. Set at the start of compute(),
437 // which is only called when the target has branch divergence. When false,
438 // isDivergent() returns false for all values.
440
442
443 // Values known to be uniform. Populated in initialize() with all values,
444 // then values are removed as divergence is propagated. After analysis,
445 // values not in this set are conservatively treated as divergent.
447
448 // Internal worklist for divergence propagation.
449 std::vector<const InstructionT *> Worklist;
450
451 // Set of instructions that require custom uniformity analysis based on
452 // operand uniformity.
454
455 /// \brief Mark \p Term as divergent and push all Instructions that become
456 /// divergent as a result on the worklist.
457 void analyzeControlDivergence(const InstructionT &Term);
458
459private:
460 const DominatorTreeT &DT;
461
462 // Recognized cycles with divergent exits.
463 SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
464
465 // Cycles assumed to be divergent.
466 //
467 // We don't use a set here because every insertion needs an explicit
468 // traversal of all existing members.
469 SmallVector<const CycleT *> AssumedDivergent;
470
471 // The SDA links divergent branches to divergent control-flow joins.
473
474 // Set of known-uniform values.
476
477 /// \brief Mark all nodes in \p JoinBlock as divergent and push them on
478 /// the worklist.
479 void taintAndPushAllDefs(const BlockT &JoinBlock);
480
481 /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
482 /// the worklist.
483 void taintAndPushPhiNodes(const BlockT &JoinBlock);
484
485 /// \brief Identify all Instructions that become divergent because \p DivExit
486 /// is a divergent cycle exit of \p DivCycle. Mark those instructions as
487 /// divergent and push them on the worklist.
488 void propagateCycleExitDivergence(const BlockT &DivExit,
489 const CycleT &DivCycle);
490
491 /// Mark as divergent all external uses of values defined in \p DefCycle.
492 void analyzeCycleExitDivergence(const CycleT &DefCycle);
493
494 /// \brief Mark as divergent all uses of \p I that are outside \p DefCycle.
495 void propagateTemporalDivergence(const InstructionT &I,
496 const CycleT &DefCycle);
497
498 /// \brief Push all users of \p Val (in the region) to the worklist.
499 void pushUsers(const InstructionT &I);
500 void pushUsers(ConstValueRefT V);
501
502 bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
503
504 /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
505 bool isTemporalDivergent(const BlockT &ObservingBlock,
506 const InstructionT &Def) const;
507};
508
509template <typename ImplT>
513
514/// Compute divergence starting with a divergent branch.
515template <typename ContextT> class DivergencePropagator {
516public:
517 using BlockT = typename ContextT::BlockT;
518 using DominatorTreeT = typename ContextT::DominatorTreeT;
519 using FunctionT = typename ContextT::FunctionT;
520 using ValueRefT = typename ContextT::ValueRefT;
521
523 using CycleT = typename CycleInfoT::CycleT;
524
528 typename SyncDependenceAnalysisT::DivergenceDescriptor;
529 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
530
535 const ContextT &Context;
536
537 // Track blocks that receive a new label. Every time we relabel a
538 // cycle header, we another pass over the modified post-order in
539 // order to propagate the header label. The bit vector also allows
540 // us to skip labels that have not changed.
542
543 // divergent join and cycle exit descriptor.
544 std::unique_ptr<DivergenceDescriptorT> DivDesc;
546
552
554 Out << "Propagator::BlockLabels {\n";
555 for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
556 const auto *Block = CyclePOT[BlockIdx];
557 const auto *Label = BlockLabels[Block];
558 Out << Context.print(Block) << "(" << BlockIdx << ") : ";
559 if (!Label) {
560 Out << "<null>\n";
561 } else {
562 Out << Context.print(Label) << "\n";
563 }
564 }
565 Out << "}\n";
566 }
567
568 // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
569 // causes a divergent join.
570 bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
571 const auto *OldLabel = BlockLabels[&SuccBlock];
572
573 LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
574 << "\tpushed label: " << Context.print(&PushedLabel)
575 << "\n"
576 << "\told label: " << Context.print(OldLabel) << "\n");
577
578 // Early exit if there is no change in the label.
579 if (OldLabel == &PushedLabel)
580 return false;
581
582 if (OldLabel != &SuccBlock) {
583 auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
584 // Assigning a new label, mark this in FreshLabels.
585 LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
586 FreshLabels.set(SuccIdx);
587 }
588
589 // This is not a join if the succ was previously unlabeled.
590 if (!OldLabel) {
591 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
592 << "\n");
593 BlockLabels[&SuccBlock] = &PushedLabel;
594 return false;
595 }
596
597 // This is a new join. Label the join block as itself, and not as
598 // the pushed label.
599 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
600 BlockLabels[&SuccBlock] = &SuccBlock;
601
602 return true;
603 }
604
605 // visiting a virtual cycle exit edge from the cycle header --> temporal
606 // divergence on join
607 bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
608 if (!computeJoin(ExitBlock, Label))
609 return false;
610
611 // Identified a divergent cycle exit
612 DivDesc->CycleDivBlocks.insert(&ExitBlock);
613 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
614 << "\n");
615 return true;
616 }
617
618 // process \p SuccBlock with reaching definition \p Label
619 bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
620 if (!computeJoin(SuccBlock, Label))
621 return false;
622
623 // Divergent, disjoint paths join.
624 DivDesc->JoinDivBlocks.insert(&SuccBlock);
625 LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
626 << "\n");
627 return true;
628 }
629
630 std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
632
633 LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
634 << Context.print(&DivTermBlock) << "\n");
635
636 int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
637 auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
638
639 // Locate the largest ancestor cycle that is not reducible and does not
640 // contain a reducible ancestor. This is done with a lambda that is defined
641 // and invoked in the same statement.
642 const CycleT *IrreducibleAncestor = [](const CycleT *C) -> const CycleT * {
643 if (!C)
644 return nullptr;
645 if (C->isReducible())
646 return nullptr;
647 while (const CycleT *P = C->getParentCycle()) {
648 if (P->isReducible())
649 return C;
650 C = P;
651 }
652 assert(!C->getParentCycle());
653 assert(!C->isReducible());
654 return C;
655 }(DivTermCycle);
656
657 // Bootstrap with branch targets
658 for (const auto *SuccBlock : successors(&DivTermBlock)) {
659 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
660 // If DivTerm exits the cycle immediately, computeJoin() might
661 // not reach SuccBlock with a different label. We need to
662 // check for this exit now.
663 DivDesc->CycleDivBlocks.insert(SuccBlock);
664 LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
665 << Context.print(SuccBlock) << "\n");
666 }
667 visitEdge(*SuccBlock, *SuccBlock);
668 }
669
670 // Technically propagation can continue until it reaches the last node.
671 //
672 // For efficiency, propagation can stop if FreshLabels.count()==1. But
673 // For irreducible cycles, let propagation continue until it reaches
674 // out of irreducible cycles (see code for details.)
675 while (true) {
676 auto BlockIdx = FreshLabels.find_last();
677 if (BlockIdx == -1)
678 break;
679
680 const auto *Block = CyclePOT[BlockIdx];
681 // If no irreducible cycle, stop if freshLable.count() = 1 and Block
682 // is the IPD. If it is in any irreducible cycle, continue propagation.
683 if (FreshLabels.count() == 1 &&
684 (!IrreducibleAncestor || !IrreducibleAncestor->contains(Block)))
685 break;
686
687 LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
688
689 FreshLabels.reset(BlockIdx);
690 if (BlockIdx == DivTermIdx) {
691 LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
692 continue;
693 }
694
695 LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
696 << BlockIdx << "\n");
697
698 const auto *Label = BlockLabels[Block];
699 assert(Label);
700
701 // If the current block is the header of a reducible cycle, then the label
702 // should be propagated to the cycle exits. If this cycle contains the
703 // branch, then those exits are divergent exits. This is true for any DFS.
704 //
705 // If some DFS has a reducible cycle C with header H, then for
706 // any other DFS, H is the header of a cycle C' that is a
707 // superset of C.
708 //
709 // - For a divergent branch inside the subgraph C, any join node inside
710 // C is either H, or some node encountered by paths within C, without
711 // passing through H.
712 //
713 // - For a divergent branch outside the subgraph C, H is the only node
714 // in C reachable from multiple paths since it is the only entry to C.
715 LLVM_DEBUG(dbgs() << "Check for reducible cycle: " << Context.print(Block)
716 << '\n');
717 if (CyclePOT.isReducibleCycleHeader(Block)) {
718 const auto *BlockCycle = CI.getCycle(Block);
719 LLVM_DEBUG(dbgs() << BlockCycle->print(Context) << '\n');
720 SmallVector<BlockT *, 4> BlockCycleExits;
721 BlockCycle->getExitBlocks(BlockCycleExits);
722 bool BranchIsInside = BlockCycle->contains(&DivTermBlock);
723 for (auto *BlockCycleExit : BlockCycleExits) {
724 if (BranchIsInside)
725 visitCycleExitEdge(*BlockCycleExit, *Label);
726 else
727 visitEdge(*BlockCycleExit, *Label);
728 }
729 } else {
730 for (const auto *SuccBlock : successors(Block))
731 visitEdge(*SuccBlock, *Label);
732 }
733 }
734
735 LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
736
737 // Check every cycle containing DivTermBlock for exit divergence.
738 // A cycle has exit divergence if the label of an exit block does
739 // not match the label of its header.
740 for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
741 Cycle = Cycle->getParentCycle()) {
742 if (Cycle->isReducible()) {
743 // The exit divergence of a reducible cycle is recorded while
744 // propagating labels.
745 continue;
746 }
748 Cycle->getExitBlocks(Exits);
749 auto *Header = Cycle->getHeader();
750 auto *HeaderLabel = BlockLabels[Header];
751 for (const auto *Exit : Exits) {
752 if (BlockLabels[Exit] != HeaderLabel) {
753 // Identified a divergent cycle exit
754 DivDesc->CycleDivBlocks.insert(Exit);
755 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
756 << "\n");
757 }
758 }
759 }
760
761 return std::move(DivDesc);
762 }
763};
764
765template <typename ContextT>
767 const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
768 : CyclePO(Context), DT(DT), CI(CI) {
769 CyclePO.compute(CI);
770}
771
772template <typename ContextT>
774 const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
775 // trivial case
776 if (succ_size(DivTermBlock) <= 1) {
777 return EmptyDivergenceDesc;
778 }
779
780 // already available in cache?
781 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
782 if (ItCached != CachedControlDivDescs.end())
783 return *ItCached->second;
784
785 // compute all join points
786 DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
787 auto DivDesc = Propagator.computeJoinPoints();
788
789 auto PrintBlockSet = [&](ConstBlockSet &Blocks) {
790 return Printable([&](raw_ostream &Out) {
791 Out << "[";
792 ListSeparator LS;
793 for (const auto *BB : Blocks) {
794 Out << LS << CI.getSSAContext().print(BB);
795 }
796 Out << "]\n";
797 });
798 };
799
801 dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
802 << "):\n JoinDivBlocks: " << PrintBlockSet(DivDesc->JoinDivBlocks)
803 << " CycleDivBlocks: " << PrintBlockSet(DivDesc->CycleDivBlocks)
804 << "\n");
805 (void)PrintBlockSet;
806
807 auto ItInserted =
808 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
809 assert(ItInserted.second);
810 return *ItInserted.first->second;
811}
812
813template <typename ContextT>
815 const InstructionT &I) {
816 if (isAlwaysUniform(I))
817 return;
818 // For custom uniformity candidates, check if the instruction can be
819 // proven uniform based on which operands are uniform/divergent.
820 // The candidate will be re-evaluated as operands become divergent.
821 if (CustomUniformityCandidates.contains(&I)) {
822 if (isCustomUniform(I))
823 return;
824 }
825 bool Marked = false;
826 if (I.isTerminator()) {
827 Marked = DivergentTermBlocks.insert(I.getParent()).second;
828 if (Marked) {
829 LLVM_DEBUG(dbgs() << "marked divergent term block: "
830 << Context.print(I.getParent()) << "\n");
831 }
832 } else {
833 Marked = markDefsDivergent(I);
834 }
835
836 if (Marked)
837 Worklist.push_back(&I);
838}
839
840template <typename ContextT>
842 ConstValueRefT Val) {
843 if (UniformValues.erase(Val)) {
844 LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
845 return true;
846 }
847 return false;
848}
849
850template <typename ContextT>
852 const InstructionT &Instr) {
853 UniformOverrides.insert(&Instr);
854}
855
856template <typename ContextT>
861
862// Mark as divergent all external uses of values defined in \p DefCycle.
863//
864// A value V defined by a block B inside \p DefCycle may be used outside the
865// cycle only if the use is a PHI in some exit block, or B dominates some exit
866// block. Thus, we check uses as follows:
867//
868// - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
869// - For every block B inside \p DefCycle that dominates at least one exit
870// block, check all uses outside \p DefCycle.
871//
872// FIXME: This function does not distinguish between divergent and uniform
873// exits. For each divergent exit, only the values that are live at that exit
874// need to be propagated as divergent at their use outside the cycle.
875template <typename ContextT>
876void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
877 const CycleT &DefCycle) {
879 DefCycle.getExitBlocks(Exits);
880 for (auto *Exit : Exits) {
881 for (auto &Phi : Exit->phis()) {
882 if (usesValueFromCycle(Phi, DefCycle)) {
883 markDivergent(Phi);
884 }
885 }
886 }
887
888 for (auto *BB : DefCycle.blocks()) {
889 if (!llvm::any_of(Exits,
890 [&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
891 continue;
892 for (auto &II : *BB) {
893 propagateTemporalDivergence(II, DefCycle);
894 }
895 }
896}
897
898template <typename ContextT>
899void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
900 const BlockT &DivExit, const CycleT &InnerDivCycle) {
901 LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
902 << "\n");
903 auto *DivCycle = &InnerDivCycle;
904 auto *OuterDivCycle = DivCycle;
905 auto *ExitLevelCycle = CI.getCycle(&DivExit);
906 const unsigned CycleExitDepth =
907 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
908
909 // Find outer-most cycle that does not contain \p DivExit
910 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
911 LLVM_DEBUG(dbgs() << " Found exiting cycle: "
912 << Context.print(DivCycle->getHeader()) << "\n");
913 OuterDivCycle = DivCycle;
914 DivCycle = DivCycle->getParentCycle();
915 }
916 LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
917 << Context.print(OuterDivCycle->getHeader()) << "\n");
918
919 if (!DivergentExitCycles.insert(OuterDivCycle).second)
920 return;
921
922 // Exit divergence does not matter if the cycle itself is assumed to
923 // be divergent.
924 for (const auto *C : AssumedDivergent) {
925 if (C->contains(OuterDivCycle))
926 return;
927 }
928
929 analyzeCycleExitDivergence(*OuterDivCycle);
930}
931
932template <typename ContextT>
933void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
934 const BlockT &BB) {
935 LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
936 for (const auto &I : instrs(BB)) {
937 // Terminators do not produce values; they are divergent only if
938 // the condition is divergent. That is handled when the divergent
939 // condition is placed in the worklist.
940 if (I.isTerminator())
941 break;
942
943 markDivergent(I);
944 }
945}
946
947/// Mark divergent phi nodes in a join block
948template <typename ContextT>
949void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
950 const BlockT &JoinBlock) {
951 LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
952 << "\n");
953 for (const auto &Phi : JoinBlock.phis()) {
954 // FIXME: The non-undef value is not constant per se; it just happens to be
955 // uniform and may not dominate this PHI. So assuming that the same value
956 // reaches along all incoming edges may itself be undefined behaviour. This
957 // particular interpretation of the undef value was added to
958 // DivergenceAnalysis in the following review:
959 //
960 // https://reviews.llvm.org/D19013
961 if (ContextT::isConstantOrUndefValuePhi(Phi))
962 continue;
963 markDivergent(Phi);
964 }
965}
966
967/// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
968///
969/// \return true iff \p Candidate was added to \p Cycles.
970template <typename CycleT>
971bool insertIfNotContained(SmallVector<CycleT *> &Cycles, CycleT *Candidate) {
972 if (llvm::any_of(Cycles,
973 [Candidate](CycleT *C) { return C->contains(Candidate); }))
974 return false;
975 Cycles.push_back(Candidate);
976 return true;
977}
978
979/// Return the outermost cycle made divergent by branch outside it.
980///
981/// If two paths that diverged outside an irreducible cycle join
982/// inside that cycle, then that whole cycle is assumed to be
983/// divergent. This does not apply if the cycle is reducible.
984template <typename CycleT, typename BlockT>
985const CycleT *getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
986 const BlockT *JoinBlock) {
987 assert(Cycle);
988 assert(Cycle->contains(JoinBlock));
989
990 if (Cycle->contains(DivTermBlock))
991 return nullptr;
992
993 const auto *OriginalCycle = Cycle;
994 const auto *Parent = Cycle->getParentCycle();
995 while (Parent && !Parent->contains(DivTermBlock)) {
996 Cycle = Parent;
997 Parent = Cycle->getParentCycle();
998 }
999
1000 // If the original cycle is not the outermost cycle, then the outermost cycle
1001 // is irreducible. If the outermost cycle were reducible, then external
1002 // diverged paths would not reach the original inner cycle.
1003 (void)OriginalCycle;
1004 assert(Cycle == OriginalCycle || !Cycle->isReducible());
1005
1006 if (Cycle->isReducible()) {
1007 assert(Cycle->getHeader() == JoinBlock);
1008 return nullptr;
1009 }
1010
1011 LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
1012 return Cycle;
1013}
1014
1015/// Return the outermost cycle made divergent by branch inside it.
1016///
1017/// This checks the "diverged entry" criterion defined in the
1018/// docs/ConvergenceAnalysis.html.
1019template <typename ContextT, typename CycleT, typename BlockT,
1020 typename DominatorTreeT>
1021const CycleT *getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1022 const BlockT *JoinBlock, const DominatorTreeT &DT,
1023 ContextT &Context) {
1024 LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
1025 << " for internal branch " << Context.print(DivTermBlock)
1026 << "\n");
1027 if (DT.properlyDominates(DivTermBlock, JoinBlock))
1028 return nullptr;
1029
1030 // Find the smallest common cycle, if one exists.
1031 assert(Cycle && Cycle->contains(JoinBlock));
1032 while (Cycle && !Cycle->contains(DivTermBlock)) {
1033 Cycle = Cycle->getParentCycle();
1034 }
1035 if (!Cycle || Cycle->isReducible())
1036 return nullptr;
1037
1038 if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
1039 return nullptr;
1040
1041 LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader())
1042 << " does not dominate join\n");
1043
1044 const auto *Parent = Cycle->getParentCycle();
1045 while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1046 LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader())
1047 << " does not dominate join\n");
1048 Cycle = Parent;
1049 Parent = Parent->getParentCycle();
1050 }
1051
1052 LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n");
1053 return Cycle;
1054}
1055
1056template <typename ContextT, typename CycleT, typename BlockT,
1057 typename DominatorTreeT>
1058const CycleT *
1059getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1060 const BlockT *JoinBlock, const DominatorTreeT &DT,
1061 ContextT &Context) {
1062 if (!Cycle)
1063 return nullptr;
1064
1065 // First try to expand Cycle to the largest that contains JoinBlock
1066 // but not DivTermBlock.
1067 const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
1068
1069 // Continue expanding to the largest cycle that contains both.
1070 const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
1071
1072 if (Int)
1073 return Int;
1074 return Ext;
1075}
1076
1077template <typename ContextT>
1078bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1079 const BlockT &ObservingBlock, const InstructionT &Def) const {
1080 const BlockT *DefBlock = Def.getParent();
1081 for (const CycleT *Cycle = CI.getCycle(DefBlock);
1082 Cycle && !Cycle->contains(&ObservingBlock);
1083 Cycle = Cycle->getParentCycle()) {
1084 if (DivergentExitCycles.contains(Cycle)) {
1085 return true;
1086 }
1087 }
1088 return false;
1089}
1090
1091template <typename ContextT>
1093 const InstructionT &Term) {
1094 const auto *DivTermBlock = Term.getParent();
1095 DivergentTermBlocks.insert(DivTermBlock);
1096 LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
1097 << "\n");
1098
1099 // Don't propagate divergence from unreachable blocks.
1100 if (!DT.isReachableFromEntry(DivTermBlock))
1101 return;
1102
1103 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1105
1106 // Iterate over all blocks now reachable by a disjoint path join
1107 for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1108 const auto *Cycle = CI.getCycle(JoinBlock);
1109 LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
1110 << "\n");
1111 if (const auto *Outermost = getOutermostDivergentCycle(
1112 Cycle, DivTermBlock, JoinBlock, DT, Context)) {
1113 LLVM_DEBUG(dbgs() << "found divergent cycle\n");
1114 DivCycles.push_back(Outermost);
1115 continue;
1116 }
1117 taintAndPushPhiNodes(*JoinBlock);
1118 }
1119
1120 // Sort by order of decreasing depth. This allows later cycles to be skipped
1121 // because they are already contained in earlier ones.
1122 llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
1123 return A->getDepth() > B->getDepth();
1124 });
1125
1126 // Cycles that are assumed divergent due to the diverged entry
1127 // criterion potentially contain temporal divergence depending on
1128 // the DFS chosen. Conservatively, all values produced in such a
1129 // cycle are assumed divergent. "Cycle invariant" values may be
1130 // assumed uniform, but that requires further analysis.
1131 for (auto *C : DivCycles) {
1132 if (!insertIfNotContained(AssumedDivergent, C))
1133 continue;
1134 LLVM_DEBUG(dbgs() << "process divergent cycle\n");
1135 for (const BlockT *BB : C->blocks()) {
1136 taintAndPushAllDefs(*BB);
1137 }
1138 }
1139
1140 const auto *BranchCycle = CI.getCycle(DivTermBlock);
1141 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1142 for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1143 propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1144 }
1145}
1146
1147template <typename ContextT>
1149 HasBranchDivergence = true;
1150
1151 // All values on the Worklist are divergent.
1152 // Their users may not have been updated yet.
1153 while (!Worklist.empty()) {
1154 const InstructionT *I = Worklist.back();
1155 Worklist.pop_back();
1156
1157 LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
1158
1159 if (I->isTerminator()) {
1161 continue;
1162 }
1163
1164 // propagate value divergence to users
1165 assert(hasDivergentDefs(*I) && "Worklist invariant violated!");
1166 pushUsers(*I);
1167 }
1168}
1169
1170template <typename ContextT>
1176
1177template <typename ContextT>
1179 const InstructionT &Instr) const {
1180 return UniformOverrides.contains(&Instr);
1181}
1182
1183template <typename ContextT>
1188
1189template <typename ContextT>
1191 const DominatorTreeT &DT, const CycleInfoT &CI,
1192 const TargetTransformInfo *TTI) {
1193 DA.reset(new ImplT{DT, CI, TTI});
1194}
1195
1196template <typename ContextT>
1198 // When we print Value, LLVM IR instruction, we want to print extra new line.
1199 // In LLVM IR print function for Value does not print new line at the end.
1200 // In MIR print for MachineInstr prints new line at the end.
1201 constexpr bool IsMIR = std::is_same<InstructionT, MachineInstr>::value;
1202 std::string NewLine = IsMIR ? "" : "\n";
1203
1204 bool FoundDivergence = false;
1205
1206 FoundDivergence |= printDivergentArgs(OS);
1207
1208 if (!AssumedDivergent.empty()) {
1209 FoundDivergence = true;
1210 OS << "CYCLES ASSUMED DIVERGENT:\n";
1211 for (const CycleT *Cycle : AssumedDivergent) {
1212 OS << " " << Cycle->print(Context) << '\n';
1213 }
1214 }
1215
1216 if (!DivergentExitCycles.empty()) {
1217 FoundDivergence = true;
1218 OS << "CYCLES WITH DIVERGENT EXIT:\n";
1219 for (const CycleT *Cycle : DivergentExitCycles) {
1220 OS << " " << Cycle->print(Context) << '\n';
1221 }
1222 }
1223
1224 if (!TemporalDivergenceList.empty()) {
1225 FoundDivergence = true;
1226 OS << "\nTEMPORAL DIVERGENCE LIST:\n";
1227
1228 for (auto [Val, UseInst, Cycle] : TemporalDivergenceList) {
1229 OS << "Value :" << Context.print(Val) << NewLine
1230 << "Used by :" << Context.print(UseInst) << NewLine
1231 << "Outside cycle :" << Cycle->print(Context) << "\n\n";
1232 }
1233 }
1234
1235 for (auto &Block : F) {
1236 OS << "\nBLOCK " << Context.print(&Block) << '\n';
1237
1238 OS << "DEFINITIONS\n";
1240 Context.appendBlockDefs(Defs, Block);
1241 for (auto Value : Defs) {
1242 if (isDivergent(Value)) {
1243 FoundDivergence = true;
1244 OS << " DIVERGENT: ";
1245 } else {
1246 OS << " ";
1247 }
1248 OS << Context.print(Value) << NewLine;
1249 }
1250
1251 OS << "TERMINATORS\n";
1253 Context.appendBlockTerms(Terms, Block);
1254 bool DivergentTerminators = hasDivergentTerminator(Block);
1255 if (DivergentTerminators)
1256 FoundDivergence = true;
1257 for (auto *T : Terms) {
1258 if (DivergentTerminators)
1259 OS << " DIVERGENT: ";
1260 else
1261 OS << " ";
1262 OS << Context.print(T) << NewLine;
1263 }
1264
1265 OS << "END BLOCK\n";
1266 }
1267
1268 if (!FoundDivergence)
1269 OS << "ALL VALUES UNIFORM\n";
1270}
1271
1272template <typename ContextT>
1276 return make_range(DA->TemporalDivergenceList.begin(),
1277 DA->TemporalDivergenceList.end());
1278}
1279
1280template <typename ContextT>
1281const typename ContextT::FunctionT &
1283 return DA->getFunction();
1284}
1285
1286/// Whether \p V is divergent at its definition.
1287/// A default-constructed instance (no analysis computed) reports everything
1288/// as uniform, which is conservatively correct for non-divergent targets.
1289template <typename ContextT>
1291 return DA && DA->isDivergent(V);
1292}
1293
1294template <typename ContextT>
1296 const InstructionT *I) const {
1297 assert(I->isTerminator() && "Expected a terminator instruction!");
1298 return DA && DA->isDivergentTerminator(*I);
1299}
1300
1301template <typename ContextT>
1303 return DA && DA->isDivergentUse(U);
1304}
1305
1306template <typename ContextT>
1308 return DA && DA->hasDivergentTerminator(B);
1309}
1310
1311/// \brief T helper function for printing.
1312template <typename ContextT>
1314 if (!DA) {
1315 Out << " Uniformity analysis not computed (no branch divergence).\n";
1316 return;
1317 }
1318 DA->print(Out);
1319}
1320
1321template <typename ContextT>
1322void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
1323 SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
1324 const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {
1325 LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
1326 while (!Stack.empty()) {
1327 auto *NextBB = Stack.back();
1328 if (Finalized.count(NextBB)) {
1329 Stack.pop_back();
1330 continue;
1331 }
1332 LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB)
1333 << "\n");
1334 auto *NestedCycle = CI.getCycle(NextBB);
1335 if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
1336 LLVM_DEBUG(dbgs() << " found a cycle\n");
1337 while (NestedCycle->getParentCycle() != Cycle)
1338 NestedCycle = NestedCycle->getParentCycle();
1339
1340 SmallVector<BlockT *, 3> NestedExits;
1341 NestedCycle->getExitBlocks(NestedExits);
1342 bool PushedNodes = false;
1343 for (auto *NestedExitBB : NestedExits) {
1344 LLVM_DEBUG(dbgs() << " examine exit: "
1345 << CI.getSSAContext().print(NestedExitBB) << "\n");
1346 if (Cycle && !Cycle->contains(NestedExitBB))
1347 continue;
1348 if (Finalized.count(NestedExitBB))
1349 continue;
1350 PushedNodes = true;
1351 Stack.push_back(NestedExitBB);
1352 LLVM_DEBUG(dbgs() << " pushed exit: "
1353 << CI.getSSAContext().print(NestedExitBB) << "\n");
1354 }
1355 if (!PushedNodes) {
1356 // All loop exits finalized -> finish this node
1357 Stack.pop_back();
1358 computeCyclePO(CI, NestedCycle, Finalized);
1359 }
1360 continue;
1361 }
1362
1363 LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n");
1364 // DAG-style
1365 bool PushedNodes = false;
1366 for (auto *SuccBB : successors(NextBB)) {
1367 LLVM_DEBUG(dbgs() << " examine succ: "
1368 << CI.getSSAContext().print(SuccBB) << "\n");
1369 if (Cycle && !Cycle->contains(SuccBB))
1370 continue;
1371 if (Finalized.count(SuccBB))
1372 continue;
1373 PushedNodes = true;
1374 Stack.push_back(SuccBB);
1375 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB)
1376 << "\n");
1377 }
1378 if (!PushedNodes) {
1379 // Never push nodes twice
1380 LLVM_DEBUG(dbgs() << " finishing node: "
1381 << CI.getSSAContext().print(NextBB) << "\n");
1382 Stack.pop_back();
1383 Finalized.insert(NextBB);
1384 appendBlock(*NextBB);
1385 }
1386 }
1387 LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
1388}
1389
1390template <typename ContextT>
1391void ModifiedPostOrder<ContextT>::computeCyclePO(
1392 const CycleInfoT &CI, const CycleT *Cycle,
1394 LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
1396 auto *CycleHeader = Cycle->getHeader();
1397
1398 LLVM_DEBUG(dbgs() << " noted header: "
1399 << CI.getSSAContext().print(CycleHeader) << "\n");
1400 assert(!Finalized.count(CycleHeader));
1401 Finalized.insert(CycleHeader);
1402
1403 // Visit the header last
1404 LLVM_DEBUG(dbgs() << " finishing header: "
1405 << CI.getSSAContext().print(CycleHeader) << "\n");
1406 appendBlock(*CycleHeader, Cycle->isReducible());
1407
1408 // Initialize with immediate successors
1409 for (auto *BB : successors(CycleHeader)) {
1410 LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB)
1411 << "\n");
1412 if (!Cycle->contains(BB))
1413 continue;
1414 if (BB == CycleHeader)
1415 continue;
1416 if (!Finalized.count(BB)) {
1417 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB)
1418 << "\n");
1419 Stack.push_back(BB);
1420 }
1421 }
1422
1423 // Compute PO inside region
1424 computeStackPO(Stack, CI, Cycle, Finalized);
1425
1426 LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
1427}
1428
1429/// \brief Generically compute the modified post order.
1430template <typename ContextT>
1434 auto *F = CI.getFunction();
1435 Stack.reserve(24); // FIXME made-up number
1436 Stack.push_back(&F->front());
1437 computeStackPO(Stack, CI, nullptr, Finalized);
1438}
1439
1440} // namespace llvm
1441
1442#undef DEBUG_TYPE
1443
1444#endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseSet and SmallDenseSet classes.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SparseBitVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Compute divergence starting with a divergent branch.
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
typename ContextT::DominatorTreeT DominatorTreeT
bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel)
std::unique_ptr< DivergenceDescriptorT > DivDesc
void printDefs(raw_ostream &Out)
typename ContextT::FunctionT FunctionT
GenericCycleInfo< ContextT > CycleInfoT
ModifiedPostOrder< ContextT > ModifiedPO
std::unique_ptr< DivergenceDescriptorT > computeJoinPoints()
bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label)
typename ContextT::ValueRefT ValueRefT
typename ContextT::BlockT BlockT
typename CycleInfoT::CycleT CycleT
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
bool visitEdge(const BlockT &SuccBlock, const BlockT &Label)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
Cycle information for a function.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Locate join blocks for disjoint paths starting at a divergent branch.
GenericSyncDependenceAnalysis(const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
ModifiedPostOrder< ContextT > ModifiedPO
DivergencePropagator< ContextT > DivergencePropagatorT
DenseMap< const BlockT *, const BlockT * > BlockLabelMap
SmallPtrSet< const BlockT *, 4 > ConstBlockSet
typename ContextT::DominatorTreeT DominatorTreeT
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::FunctionT FunctionT
typename ContextT::InstructionT InstructionT
typename ContextT::ValueRefT ValueRefT
const DivergenceDescriptor & getJoinBlocks(const BlockT *DivTermBlock)
Computes divergent join points and cycle exits caused by branch divergence in Term.
SmallVector< TemporalDivergenceTuple, 8 > TemporalDivergenceList
bool isAlwaysUniform(const InstructionT &Instr) const
Whether Val will always return a uniform value regardless of its operands.
bool isCustomUniform(const InstructionT &I) const
Check if an instruction with Custom uniformity can be proven uniform based on its operands.
typename ContextT::ValueRefT ValueRefT
void analyzeControlDivergence(const InstructionT &Term)
Mark Term as divergent and push all Instructions that become divergent as a result on the worklist.
void addCustomUniformityCandidate(const InstructionT *I)
Add an instruction that requires custom uniformity analysis.
bool isDivergent(ConstValueRefT V) const
Whether Val is divergent at its definition.
bool isDivergentUse(const UseT &U) const
bool hasDivergentDefs(const InstructionT &I) const
SmallPtrSet< const InstructionT *, 8 > CustomUniformityCandidates
typename ContextT::InstructionT InstructionT
bool printDivergentArgs(raw_ostream &Out) const
Print divergent arguments and return true if any were found.
typename ContextT::ConstValueRefT ConstValueRefT
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
typename ContextT::DominatorTreeT DominatorTreeT
void compute()
Propagate divergence to all instructions in the region.
bool hasDivergentTerminator(const BlockT &B) const
GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI, const TargetTransformInfo *TTI)
bool markDefsDivergent(const InstructionT &Instr)
Mark outputs of Instr as divergent.
typename ContextT::FunctionT FunctionT
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
std::vector< const InstructionT * > Worklist
void markDivergent(const InstructionT &I)
Examine I for divergent outputs and add to the worklist.
SmallPtrSet< const BlockT *, 32 > DivergentTermBlocks
bool isDivergentTerminator(const InstructionT &I) const
GenericCycleInfo< ContextT > CycleInfoT
void addUniformOverride(const InstructionT &Instr)
Mark UniVal as a value that is always uniform.
void recordTemporalDivergence(ConstValueRefT, const InstructionT *, const CycleT *)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
bool hasDivergentTerminator(const BlockT &B)
void print(raw_ostream &Out) const
T helper function for printing.
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
bool isDivergentAtDef(ConstValueRefT V) const
Whether V is divergent at its definition.
typename ContextT::ConstValueRefT ConstValueRefT
typename ContextT::BlockT BlockT
bool isDivergentAtUse(const UseT &U) const
Whether U is divergent at its use.
typename ContextT::InstructionT InstructionT
const FunctionT & getFunction() const
The GPU kernel this analysis result is for.
bool isDivergentTerminator(const InstructionT *I) const
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::DominatorTreeT DominatorTreeT
iterator_range< TemporalDivergenceTuple * > getTemporalDivergenceList() const
A helper class to return the specified delimiter string after the first invocation of operator String...
Construct a specially modified post-order traversal of cycles.
typename ContextT::FunctionT FunctionT
typename CycleInfoT::CycleT CycleT
bool isReducibleCycleHeader(const BlockT *BB) const
void appendBlock(const BlockT &BB, bool IsReducibleCycleHeader=false)
const BlockT * operator[](size_t Idx) const
ModifiedPostOrder(const ContextT &C)
unsigned count(BlockT *BB) const
void compute(const CycleInfoT &CI)
Generically compute the modified post order.
GenericCycleInfo< ContextT > CycleInfoT
unsigned getIndex(const BlockT *BB) const
typename std::vector< BlockT * >::const_iterator const_iterator
typename ContextT::DominatorTreeT DominatorTreeT
typename ContextT::BlockT BlockT
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM Value Representation.
Definition Value.h:75
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
const CycleT * getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
const CycleT * getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Return the outermost cycle made divergent by branch inside it.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
TargetTransformInfo TTI
bool insertIfNotContained(SmallVector< CycleT * > &Cycles, CycleT *Candidate)
Add Candidate to Cycles if it is not already contained in Cycles.
const CycleT * getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock)
Return the outermost cycle made divergent by branch outside it.
auto instrs(const MachineBasicBlock &BB)
Information discovered by the sync dependence analysis for each divergent branch.