LLVM 23.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a printer that converts from our internal representation
10// of machine-dependent LLVM code to NVPTX assembly language.
11//
12//===----------------------------------------------------------------------===//
13
14#include "NVPTXAsmPrinter.h"
19#include "NVPTX.h"
20#include "NVPTXDwarfDebug.h"
21#include "NVPTXMCExpr.h"
23#include "NVPTXRegisterInfo.h"
24#include "NVPTXSubtarget.h"
25#include "NVPTXTargetMachine.h"
26#include "NVPTXUtilities.h"
27#include "NVVMProperties.h"
29#include "cl_common_defines.h"
30#include "llvm/ADT/APFloat.h"
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/ArrayRef.h"
33#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Sequence.h"
40#include "llvm/ADT/StringRef.h"
41#include "llvm/ADT/Twine.h"
56#include "llvm/IR/Argument.h"
57#include "llvm/IR/Attributes.h"
58#include "llvm/IR/BasicBlock.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugInfo.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalAlias.h"
68#include "llvm/IR/GlobalValue.h"
70#include "llvm/IR/Instruction.h"
71#include "llvm/IR/LLVMContext.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Operator.h"
74#include "llvm/IR/Type.h"
75#include "llvm/IR/User.h"
76#include "llvm/MC/MCExpr.h"
77#include "llvm/MC/MCInst.h"
78#include "llvm/MC/MCInstrDesc.h"
79#include "llvm/MC/MCStreamer.h"
80#include "llvm/MC/MCSymbol.h"
85#include "llvm/Support/Endian.h"
92#include <cassert>
93#include <cstdint>
94#include <cstring>
95#include <string>
96
97using namespace llvm;
98
99#define DEPOTNAME "__local_depot"
100
102 assert(V.hasName() && "Found texture variable with no name");
103 return V.getName();
104}
105
107 assert(V.hasName() && "Found surface variable with no name");
108 return V.getName();
109}
110
112 assert(V.hasName() && "Found sampler variable with no name");
113 return V.getName();
114}
115
116/// Emits initial debug location directive.
118 DwarfDebug *DD,
119 MCStreamer &OutStreamer) {
120 if (!DD)
121 return;
122
123 assert(OutStreamer.hasRawTextSupport() && "Expected assembly output mode.");
124 // This is NVPTX specific and it's unclear why.
125 // PR51079: If we have code without debug information we need to give up.
126 const DISubprogram *SP = MF.getFunction().getSubprogram();
127 if (!SP)
128 return;
129 assert(SP->getUnit());
130 // NoDebug and DebugDirectivesOnly do not require emitting the initial loc
131 // directive. NoDebug does not require any debug directives and the initial
132 // loc directive is not needed for DebugDirectivesOnly as it is redundant
133 // assuming this is a non-empty function.
134 if (SP->getUnit()->isDebugDirectivesOnly() || SP->getUnit()->isNoDebug())
135 return;
136
137 (void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
138}
139
140/// discoverDependentGlobals - Return a set of GlobalVariables on which \p V
141/// depends.
142static void
145 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
146 Globals.insert(GV);
147 return;
148 }
149
150 if (const User *U = dyn_cast<User>(V))
151 for (const auto &O : U->operands())
152 discoverDependentGlobals(O, Globals);
153}
154
155/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
156/// instances to be emitted, but only after any dependents have been added
157/// first.s
158static void
163 // Have we already visited this one?
164 if (Visited.count(GV))
165 return;
166
167 // Do we have a circular dependency?
168 if (!Visiting.insert(GV).second)
169 report_fatal_error("Circular dependency found in global variable set");
170
171 // Make sure we visit all dependents first
173 for (const auto &O : GV->operands())
174 discoverDependentGlobals(O, Others);
175
176 for (const GlobalVariable *GV : Others)
177 VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
178
179 // Now we can visit ourself
180 Order.push_back(GV);
181 Visited.insert(GV);
182 Visiting.erase(GV);
183}
184
185void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
186 NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(),
187 getSubtargetInfo().getFeatureBits());
188
189 MCInst Inst;
190 lowerToMCInst(MI, Inst);
192}
193
194void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
195 OutMI.setOpcode(MI->getOpcode());
196 for (const auto MO : MI->operands())
197 OutMI.addOperand(lowerOperand(MO));
198}
199
200MCOperand NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
201 switch (MO.getType()) {
202 default:
203 llvm_unreachable("unknown operand type");
205 return MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
207 return MCOperand::createImm(MO.getImm());
212 return GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
214 return GetSymbolRef(getSymbol(MO.getGlobal()));
216 const ConstantFP *Cnt = MO.getFPImm();
217 const APFloat &Val = Cnt->getValueAPF();
218
219 switch (Cnt->getType()->getTypeID()) {
220 default:
221 report_fatal_error("Unsupported FP type");
222 break;
223 case Type::HalfTyID:
226 case Type::BFloatTyID:
229 case Type::FloatTyID:
232 case Type::DoubleTyID:
235 }
236 break;
237 }
238 }
239}
240
241unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
243 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
244
245 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
246 unsigned RegNum = RegMap[Reg];
247
248 // Encode the register class in the upper 4 bits
249 // Must be kept in sync with NVPTXInstPrinter::printRegName
250 unsigned Ret = 0;
251 if (RC == &NVPTX::B1RegClass) {
252 Ret = (1 << 28);
253 } else if (RC == &NVPTX::B16RegClass) {
254 Ret = (2 << 28);
255 } else if (RC == &NVPTX::B32RegClass) {
256 Ret = (3 << 28);
257 } else if (RC == &NVPTX::B64RegClass) {
258 Ret = (4 << 28);
259 } else if (RC == &NVPTX::B128RegClass) {
260 Ret = (7 << 28);
261 } else {
262 report_fatal_error("Bad register class");
263 }
264
265 // Insert the vreg number
266 Ret |= (RegNum & 0x0FFFFFFF);
267 return Ret;
268 } else {
269 // Some special-use registers are actually physical registers.
270 // Encode this as the register class ID of 0 and the real register ID.
271 return Reg & 0x0FFFFFFF;
272 }
273}
274
275MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
276 const MCExpr *Expr;
277 Expr = MCSymbolRefExpr::create(Symbol, OutContext);
278 return MCOperand::createExpr(Expr);
279}
280
281void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
282 const DataLayout &DL = getDataLayout();
283 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
284 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
285
286 Type *Ty = F->getReturnType();
287 // A void or zero-sized return type (e.g. an empty struct) produces no return
288 // parameter.
289 if (Ty->isVoidTy() || Ty->isEmptyTy())
290 return;
291 O << " (";
292
293 auto PrintScalarRetVal = [&](unsigned Size) {
294 O << ".param .b" << promoteScalarArgumentSize(Size) << " func_retval0";
295 };
296 if (shouldPassAsArray(Ty)) {
297 const unsigned TotalSize = DL.getTypeAllocSize(Ty);
298 const Align RetAlignment =
299 getPTXParamAlign(F, Ty, AttributeList::ReturnIndex, DL);
300 O << ".param .align " << RetAlignment.value() << " .b8 func_retval0["
301 << TotalSize << "]";
302 } else if (Ty->isFloatingPointTy()) {
303 PrintScalarRetVal(Ty->getPrimitiveSizeInBits());
304 } else if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
305 PrintScalarRetVal(ITy->getBitWidth());
306 } else if (isa<PointerType>(Ty)) {
307 PrintScalarRetVal(TLI->getPointerTy(DL).getSizeInBits());
308 } else
309 llvm_unreachable("Unknown return type");
310 O << ") ";
311}
312
313void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
314 raw_ostream &O) {
315 const Function &F = MF.getFunction();
316 printReturnValStr(&F, O);
317}
318
319void NVPTXAsmPrinter::emitCallPrototype(const CallBase &CB,
320 unsigned UniqueCallSite,
321 raw_ostream &O) const {
322 const DataLayout &DL = getDataLayout();
323 const NVPTXSubtarget &STI = MF->getSubtarget<NVPTXSubtarget>();
324 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
325 const auto PtrVT = TLI->getPointerTy(DL);
326 Type *RetTy = CB.getFunctionType()->getReturnType();
327
328 O << "prototype_" << UniqueCallSite << " : .callprototype ";
329
330 if (RetTy->isVoidTy() || RetTy->isEmptyTy()) {
331 O << "()";
332 } else {
333 O << "(";
334 if (shouldPassAsArray(RetTy)) {
335 const Align RetAlign =
336 getPTXParamAlign(&CB, RetTy, AttributeList::ReturnIndex, DL);
337 O << ".param .align " << RetAlign.value() << " .b8 _["
338 << DL.getTypeAllocSize(RetTy) << "]";
339 } else if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy()) {
340 unsigned size = 0;
341 if (auto *ITy = dyn_cast<IntegerType>(RetTy)) {
342 size = ITy->getBitWidth();
343 } else {
344 assert(RetTy->isFloatingPointTy() &&
345 "Floating point type expected here");
346 size = RetTy->getPrimitiveSizeInBits();
347 }
348 // PTX ABI requires all scalar return values to be at least 32
349 // bits in size. fp16 normally uses .b16 as its storage type in
350 // PTX, so its size must be adjusted here, too.
352
353 O << ".param .b" << size << " _";
354 } else if (isa<PointerType>(RetTy)) {
355 O << ".param .b" << PtrVT.getSizeInBits() << " _";
356 } else {
357 llvm_unreachable("Unknown return type");
358 }
359 O << ") ";
360 }
361 O << "_ (";
362
363 auto MakeArg = [&](const unsigned I) {
364 Type *Ty = CB.getArgOperand(I)->getType();
365
366 if (CB.paramHasAttr(I, Attribute::ByVal)) {
367 // Indirect calls need strict ABI alignment so we disable optimizations by
368 // not providing a function to optimize.
369 Type *ETy = CB.getParamByValType(I);
370 // Mirror the byval alignment computed by SelectionDAGBuilder: prefer an
371 // explicit stack/param alignment, otherwise fall back to the byval type
372 // alignment.
373 MaybeAlign InitialAlign = CB.getParamStackAlign(I);
374 if (!InitialAlign)
375 InitialAlign = CB.getParamAlign(I);
376 Align ByValAlign =
377 InitialAlign.value_or(TLI->getByValTypeAlignment(ETy, DL));
378 Align ParamByValAlign =
379 getDeviceByValParamAlign(/*F=*/nullptr, ETy, ByValAlign, DL);
380
381 O << ".param .align " << ParamByValAlign.value() << " .b8 _["
382 << DL.getTypeAllocSize(ETy) << "]";
383 return;
384 }
385
386 if (shouldPassAsArray(Ty)) {
387 Align ParamAlign =
388 getPTXParamAlign(&CB, Ty, I + AttributeList::FirstArgIndex, DL);
389 O << ".param .align " << ParamAlign.value() << " .b8 _["
390 << DL.getTypeAllocSize(Ty) << "]";
391 return;
392 }
393 // scalar type
394 unsigned sz = 0;
395 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
396 sz = promoteScalarArgumentSize(ITy->getBitWidth());
397 } else if (isa<PointerType>(Ty)) {
398 sz = PtrVT.getSizeInBits();
399 } else {
400 sz = Ty->getPrimitiveSizeInBits();
401 }
402 O << ".param .b" << sz << " _";
403 };
404
405 const FunctionType *FTy = CB.getFunctionType();
406 const unsigned NumArgs = FTy->getNumParams();
407
408 // Zero-sized arguments (e.g. empty structs) are not passed and so do not
409 // appear in the prototype.
410 const auto NonEmptyArgs = make_filter_range(seq(NumArgs), [&](unsigned I) {
411 return !CB.getArgOperand(I)->getType()->isEmptyTy();
412 });
413
414 interleave(NonEmptyArgs, O, MakeArg, ", ");
415
416 if (FTy->isVarArg() && CB.arg_size() > NumArgs)
417 O << (NonEmptyArgs.empty() ? "" : ",") << " .param .align "
418 << STI.getMaxRequiredAlignment() << " .b8 _[]";
419
420 O << ")";
421 if (shouldEmitPTXNoReturn(&CB, TM))
422 O << " .noreturn";
423 O << ";\n";
424}
425
426// Return true if MBB is the header of a loop marked with
427// llvm.loop.unroll.disable or llvm.loop.unroll.count=1.
428bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
429 const MachineBasicBlock &MBB) const {
430 MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
431 // We insert .pragma "nounroll" only to the loop header.
432 if (!LI.isLoopHeader(&MBB))
433 return false;
434
435 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
436 // we iterate through each back edge of the loop with header MBB, and check
437 // whether its metadata contains llvm.loop.unroll.disable.
438 for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
439 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
440 // Edges from other loops to MBB are not back edges.
441 continue;
442 }
443 if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
444 if (MDNode *LoopID =
445 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
446 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
447 return true;
448 if (MDNode *UnrollCountMD =
449 GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
450 if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
451 ->isOne())
452 return true;
453 }
454 }
455 }
456 }
457 return false;
458}
459
460void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
462 if (isLoopHeaderOfNoUnroll(MBB))
463 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
464}
465
467 SmallString<128> Str;
468 raw_svector_ostream O(Str);
469
470 if (!GlobalsEmitted) {
471 emitGlobals(*MF->getFunction().getParent());
472 GlobalsEmitted = true;
473 }
474
475 // Set up
476 MRI = &MF->getRegInfo();
477 F = &MF->getFunction();
478 emitLinkageDirective(F, O);
479 if (isKernelFunction(*F))
480 O << ".entry ";
481 else {
482 O << ".func ";
483 printReturnValStr(*MF, O);
484 }
485
486 CurrentFnSym->print(O, MAI);
487
488 emitFunctionParamList(F, O);
489 O << "\n";
490
491 if (isKernelFunction(*F))
492 emitKernelFunctionDirectives(*F, O);
493
495 O << ".noreturn";
496
497 OutStreamer->emitRawText(O.str());
498
499 VRegMapping.clear();
500 // Emit open brace for function body.
501 OutStreamer->emitRawText(StringRef("{\n"));
502 setAndEmitFunctionVirtualRegisters(*MF);
503 encodeDebugInfoRegisterNumbers(*MF);
504 // Emit initial .loc debug directive for correct relocation symbol data.
506}
507
509 bool Result = AsmPrinter::runOnMachineFunction(F);
510 // Emit closing brace for the body of function F.
511 // The closing brace must be emitted here because we need to emit additional
512 // debug labels/data after the last basic block.
513 // We need to emit the closing brace here because we don't have function that
514 // finished emission of the function body.
515 OutStreamer->emitRawText(StringRef("}\n"));
516 return Result;
517}
518
521 raw_svector_ostream O(Str);
522 emitDemotedVars(&MF->getFunction(), O);
523
524 const auto *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
525 for (const auto &[Id, CB] : MFI->getCallPrototypes())
526 emitCallPrototype(*CB, Id, O);
527
528 OutStreamer->emitRawText(O.str());
529}
530
532 VRegMapping.clear();
533}
534
538 return OutContext.getOrCreateSymbol(Str);
539}
540
541void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
542 Register RegNo = MI->getOperand(0).getReg();
543 if (RegNo.isVirtual()) {
544 OutStreamer->AddComment(Twine("implicit-def: ") +
546 } else {
547 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
548 OutStreamer->AddComment(Twine("implicit-def: ") +
549 STI.getRegisterInfo()->getName(RegNo));
550 }
551 OutStreamer->addBlankLine();
552}
553
554void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
555 raw_ostream &O) const {
556 // If the NVVM IR has some of reqntid* specified, then output
557 // the reqntid directive, and set the unspecified ones to 1.
558 // If none of Reqntid* is specified, don't output reqntid directive.
559 const auto ReqNTID = getReqNTID(F);
560 if (!ReqNTID.empty())
561 O << formatv(".reqntid {0:$[, ]}\n",
563
564 const auto MaxNTID = getMaxNTID(F);
565 if (!MaxNTID.empty())
566 O << formatv(".maxntid {0:$[, ]}\n",
568
569 if (const auto Mincta = getMinCTASm(F))
570 O << ".minnctapersm " << *Mincta << "\n";
571
572 if (const auto Maxnreg = getMaxNReg(F))
573 O << ".maxnreg " << *Maxnreg << "\n";
574
575 // .maxclusterrank directive requires SM_90 or higher, make sure that we
576 // filter it out for lower SM versions, as it causes a hard ptxas crash.
577 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
578 const NVPTXSubtarget *STI = &NTM.getSubtarget<NVPTXSubtarget>(F);
579
580 if (STI->getSmVersion() >= 90) {
581 const auto ClusterDim = getClusterDim(F);
583
584 if (!ClusterDim.empty()) {
585
586 if (!BlocksAreClusters)
587 O << ".explicitcluster\n";
588
589 if (ClusterDim[0] != 0) {
590 assert(llvm::all_of(ClusterDim, not_equal_to(0)) &&
591 "cluster_dim_x != 0 implies cluster_dim_y and cluster_dim_z "
592 "should be non-zero as well");
593
594 O << formatv(".reqnctapercluster {0:$[, ]}\n",
596 } else {
597 assert(llvm::all_of(ClusterDim, equal_to(0)) &&
598 "cluster_dim_x == 0 implies cluster_dim_y and cluster_dim_z "
599 "should be 0 as well");
600 }
601 }
602
603 if (BlocksAreClusters) {
604 LLVMContext &Ctx = F.getContext();
605 if (ReqNTID.empty() || ClusterDim.empty())
606 Ctx.diagnose(DiagnosticInfoUnsupported(
607 F, "blocksareclusters requires reqntid and cluster_dim attributes",
608 F.getSubprogram()));
609 else if (STI->getPTXVersion() < 90)
610 Ctx.diagnose(DiagnosticInfoUnsupported(
611 F, "blocksareclusters requires PTX version >= 9.0",
612 F.getSubprogram()));
613 else
614 O << ".blocksareclusters\n";
615 }
616
617 if (const auto Maxclusterrank = getMaxClusterRank(F))
618 O << ".maxclusterrank " << *Maxclusterrank << "\n";
619 }
620}
621
622std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
623 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
624
625 std::string Name;
626 raw_string_ostream NameStr(Name);
627
628 VRegRCMap::const_iterator I = VRegMapping.find(RC);
629 assert(I != VRegMapping.end() && "Bad register class");
630 const DenseMap<unsigned, unsigned> &RegMap = I->second;
631
632 VRegMap::const_iterator VI = RegMap.find(Reg);
633 assert(VI != RegMap.end() && "Bad virtual register");
634 unsigned MappedVR = VI->second;
635
636 NameStr << getNVPTXRegClassStr(RC) << MappedVR;
637
638 return Name;
639}
640
641void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
642 raw_ostream &O) {
643 O << getVirtualRegisterName(vr);
644}
645
646void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA,
647 raw_ostream &O) {
649 if (!F || isKernelFunction(*F) || F->isDeclaration())
651 "NVPTX aliasee must be a non-kernel function definition");
652
653 if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() ||
655 report_fatal_error("NVPTX aliasee must not be '.weak'");
656
657 emitDeclarationWithName(F, getSymbol(GA), O);
658}
659
660void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
661 emitDeclarationWithName(F, getSymbol(F), O);
662}
663
664void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S,
665 raw_ostream &O) {
666 emitLinkageDirective(F, O);
667 if (isKernelFunction(*F))
668 O << ".entry ";
669 else
670 O << ".func ";
671 printReturnValStr(F, O);
672 S->print(O, MAI);
673 O << "\n";
674 emitFunctionParamList(F, O);
675 O << "\n";
677 O << ".noreturn";
678 O << ";\n";
679}
680
681static bool usedInGlobalVarDef(const Constant *C) {
682 if (!C)
683 return false;
684
686 return GV->getName() != "llvm.used";
687
688 for (const User *U : C->users())
689 if (const Constant *C = dyn_cast<Constant>(U))
691 return true;
692
693 return false;
694}
695
696static bool usedInOneFunc(const User *U, Function const *&OneFunc) {
697 if (const GlobalVariable *OtherGV = dyn_cast<GlobalVariable>(U))
698 if (OtherGV->getName() == "llvm.used")
699 return true;
700
701 if (const Instruction *I = dyn_cast<Instruction>(U)) {
702 if (const Function *CurFunc = I->getFunction()) {
703 if (OneFunc && (CurFunc != OneFunc))
704 return false;
705 OneFunc = CurFunc;
706 return true;
707 }
708 return false;
709 }
710
711 for (const User *UU : U->users())
712 if (!usedInOneFunc(UU, OneFunc))
713 return false;
714
715 return true;
716}
717
718/* Find out if a global variable can be demoted to local scope.
719 * Currently, this is valid for CUDA shared variables, which have local
720 * scope and global lifetime. So the conditions to check are :
721 * 1. Is the global variable in shared address space?
722 * 2. Does it have local linkage?
723 * 3. Is the global variable referenced only in one function?
724 */
725static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f) {
726 if (!GV->hasLocalLinkage())
727 return false;
729 return false;
730
731 const Function *oneFunc = nullptr;
732
733 bool flag = usedInOneFunc(GV, oneFunc);
734 if (!flag)
735 return false;
736 if (!oneFunc)
737 return false;
738 f = oneFunc;
739 return true;
740}
741
742static bool useFuncSeen(const Constant *C,
743 const SmallPtrSetImpl<const Function *> &SeenSet) {
744 for (const User *U : C->users()) {
745 if (const Constant *cu = dyn_cast<Constant>(U)) {
746 if (useFuncSeen(cu, SeenSet))
747 return true;
748 } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
749 if (const Function *Caller = I->getFunction())
750 if (SeenSet.contains(Caller))
751 return true;
752 }
753 }
754 return false;
755}
756
757void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
758 SmallPtrSet<const Function *, 32> SeenSet;
759 for (const Function &F : M) {
760 if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
761 emitDeclaration(&F, O);
762 continue;
763 }
764
765 if (F.isDeclaration()) {
766 if (F.use_empty())
767 continue;
768 if (F.getIntrinsicID())
769 continue;
770 // An unrecognized intrinsic would produce an invalid PTX declaration. Let
771 // the user know that, and skip it.
772 if (F.isIntrinsic()) {
773 LLVMContext &Ctx = F.getContext();
774 Ctx.diagnose(DiagnosticInfoUnsupported(
775 F, "unknown intrinsic '" + F.getName() +
776 "' cannot be lowered by the NVPTX backend"));
777 continue;
778 }
779 emitDeclaration(&F, O);
780 continue;
781 }
782 for (const User *U : F.users()) {
783 if (const Constant *C = dyn_cast<Constant>(U)) {
784 if (usedInGlobalVarDef(C)) {
785 // The use is in the initialization of a global variable
786 // that is a function pointer, so print a declaration
787 // for the original function
788 emitDeclaration(&F, O);
789 break;
790 }
791 // Emit a declaration of this function if the function that
792 // uses this constant expr has already been seen.
793 if (useFuncSeen(C, SeenSet)) {
794 emitDeclaration(&F, O);
795 break;
796 }
797 }
798
799 if (!isa<Instruction>(U))
800 continue;
801 const Function *Caller = cast<Instruction>(U)->getFunction();
802 if (!Caller)
803 continue;
804
805 // If a caller has already been seen, then the caller is
806 // appearing in the module before the callee. so print out
807 // a declaration for the callee.
808 if (SeenSet.contains(Caller)) {
809 emitDeclaration(&F, O);
810 break;
811 }
812 }
813 SeenSet.insert(&F);
814 }
815 for (const GlobalAlias &GA : M.aliases())
816 emitAliasDeclaration(&GA, O);
817}
818
819void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
820 // Construct a default subtarget off of the TargetMachine defaults. The
821 // rest of NVPTX isn't friendly to change subtargets per function and
822 // so the default TargetMachine will have all of the options.
823 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
824 const NVPTXSubtarget *STI = NTM.getSubtargetImpl();
825
826 // Emit header before any dwarf directives are emitted below.
827 emitHeader(M, *STI);
828}
829
830/// Create NVPTX-specific DwarfDebug handler.
834
836 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
837 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
838 if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
839 report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
840
841 // We need to call the parent's one explicitly.
842 bool Result = AsmPrinter::doInitialization(M);
843
844 GlobalsEmitted = false;
845
846 return Result;
847}
848
849void NVPTXAsmPrinter::emitGlobals(const Module &M) {
850 SmallString<128> Str2;
851 raw_svector_ostream OS2(Str2);
852
853 emitDeclarations(M, OS2);
854
855 // As ptxas does not support forward references of globals, we need to first
856 // sort the list of module-level globals in def-use order. We visit each
857 // global variable in order, and ensure that we emit it *after* its dependent
858 // globals. We use a little extra memory maintaining both a set and a list to
859 // have fast searches while maintaining a strict ordering.
863
864 // Visit each global variable, in order
865 for (const GlobalVariable &I : M.globals())
866 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
867
868 assert(GVVisited.size() == M.global_size() && "Missed a global variable");
869 assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
870
871 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
872 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
873
874 // Print out module-level global variables in proper order
875 for (const GlobalVariable *GV : Globals)
876 printModuleLevelGV(GV, OS2, /*ProcessDemoted=*/false, STI);
877
878 OS2 << '\n';
879
880 OutStreamer->emitRawText(OS2.str());
881}
882
883void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
885 raw_svector_ostream OS(Str);
886
887 MCSymbol *Name = getSymbol(&GA);
888
889 OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName()
890 << ";\n";
891
892 OutStreamer->emitRawText(OS.str());
893}
894
895NVPTXTargetStreamer *NVPTXAsmPrinter::getTargetStreamer() const {
896 return static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
897}
898
899static bool hasFullDebugInfo(Module &M) {
900 for (DICompileUnit *CU : M.debug_compile_units()) {
901 switch(CU->getEmissionKind()) {
904 break;
907 return true;
908 }
909 }
910
911 return false;
912}
913
914void NVPTXAsmPrinter::emitHeader(Module &M, const NVPTXSubtarget &STI) {
915 auto *TS = getTargetStreamer();
916
917 TS->emitBanner();
918
919 const unsigned PTXVersion = STI.getPTXVersion();
920 TS->emitVersionDirective(PTXVersion);
921
922 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
923 bool TexModeIndependent = NTM.getDrvInterface() == NVPTX::NVCL;
924
925 TS->emitTargetDirective(STI.getTargetName(), TexModeIndependent,
927 TS->emitAddressSizeDirective(M.getDataLayout().getPointerSizeInBits());
928}
929
931 // If we did not emit any functions, then the global declarations have not
932 // yet been emitted.
933 if (!GlobalsEmitted) {
934 emitGlobals(M);
935 GlobalsEmitted = true;
936 }
937
938 // call doFinalization
939 bool ret = AsmPrinter::doFinalization(M);
940
942
943 auto *TS =
944 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer());
945 // Close the last emitted section
946 if (hasDebugInfo()) {
947 TS->closeLastSection();
948 // Emit empty .debug_macinfo section for better support of the empty files.
949 OutStreamer->emitRawText("\t.section\t.debug_macinfo\t{\t}");
950 }
951
952 // Output last DWARF .file directives, if any.
954
955 return ret;
956}
957
958// This function emits appropriate linkage directives for
959// functions and global variables.
960//
961// extern function declaration -> .extern
962// extern function definition -> .visible
963// external global variable with init -> .visible
964// external without init -> .extern
965// appending -> not allowed, assert.
966// for any linkage other than
967// internal, private, linker_private,
968// linker_private_weak, linker_private_weak_def_auto,
969// we emit -> .weak.
970
971void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
972 raw_ostream &O) {
973 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
974 if (V->hasExternalLinkage()) {
975 if (const auto *GVar = dyn_cast<GlobalVariable>(V))
976 O << (GVar->hasInitializer() ? ".visible " : ".extern ");
977 else if (V->isDeclaration())
978 O << ".extern ";
979 else
980 O << ".visible ";
981 } else if (V->hasAppendingLinkage()) {
982 report_fatal_error("Symbol '" + (V->hasName() ? V->getName() : "") +
983 "' has unsupported appending linkage type");
984 } else if (!V->hasInternalLinkage() && !V->hasPrivateLinkage()) {
985 O << ".weak ";
986 }
987 }
988}
989
990void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
991 raw_ostream &O, bool ProcessDemoted,
992 const NVPTXSubtarget &STI) {
993 // Skip meta data
994 if (GVar->hasSection())
995 if (GVar->getSection() == "llvm.metadata")
996 return;
997
998 // Skip LLVM intrinsic global variables
999 if (GVar->getName().starts_with("llvm.") ||
1000 GVar->getName().starts_with("nvvm."))
1001 return;
1002
1003 const DataLayout &DL = getDataLayout();
1004
1005 // GlobalVariables are always constant pointers themselves.
1006 Type *ETy = GVar->getValueType();
1007
1008 if (GVar->hasExternalLinkage()) {
1009 if (GVar->hasInitializer())
1010 O << ".visible ";
1011 else
1012 O << ".extern ";
1013 } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() &&
1015 O << ".common ";
1016 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1018 GVar->hasCommonLinkage()) {
1019 O << ".weak ";
1020 }
1021
1022 const PTXOpaqueType OpaqueType = getPTXOpaqueType(*GVar);
1023
1024 if (OpaqueType == PTXOpaqueType::Texture) {
1025 O << ".global .texref " << getTextureName(*GVar) << ";\n";
1026 return;
1027 }
1028
1029 if (OpaqueType == PTXOpaqueType::Surface) {
1030 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1031 return;
1032 }
1033
1034 if (GVar->isDeclaration()) {
1035 // (extern) declarations, no definition or initializer
1036 // Currently the only known declaration is for an automatic __local
1037 // (.shared) promoted to global.
1038 emitPTXGlobalVariable(GVar, O, STI);
1039 O << ";\n";
1040 return;
1041 }
1042
1043 if (OpaqueType == PTXOpaqueType::Sampler) {
1044 O << ".global .samplerref " << getSamplerName(*GVar);
1045
1046 const Constant *Initializer = nullptr;
1047 if (GVar->hasInitializer())
1048 Initializer = GVar->getInitializer();
1049 const ConstantInt *CI = nullptr;
1050 if (Initializer)
1051 CI = dyn_cast<ConstantInt>(Initializer);
1052 if (CI) {
1053 unsigned sample = CI->getZExtValue();
1054
1055 O << " = { ";
1056
1057 for (int i = 0,
1058 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1059 i < 3; i++) {
1060 O << "addr_mode_" << i << " = ";
1061 switch (addr) {
1062 case 0:
1063 O << "wrap";
1064 break;
1065 case 1:
1066 O << "clamp_to_border";
1067 break;
1068 case 2:
1069 O << "clamp_to_edge";
1070 break;
1071 case 3:
1072 O << "wrap";
1073 break;
1074 case 4:
1075 O << "mirror";
1076 break;
1077 }
1078 O << ", ";
1079 }
1080 O << "filter_mode = ";
1081 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1082 case 0:
1083 O << "nearest";
1084 break;
1085 case 1:
1086 O << "linear";
1087 break;
1088 case 2:
1089 llvm_unreachable("Anisotropic filtering is not supported");
1090 default:
1091 O << "nearest";
1092 break;
1093 }
1094 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1095 O << ", force_unnormalized_coords = 1";
1096 }
1097 O << " }";
1098 }
1099
1100 O << ";\n";
1101 return;
1102 }
1103
1104 if (GVar->hasPrivateLinkage()) {
1105 if (GVar->getName().starts_with("unrollpragma"))
1106 return;
1107
1108 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1109 if (GVar->getName().starts_with("filename"))
1110 return;
1111 if (GVar->use_empty())
1112 return;
1113 }
1114
1115 const Function *DemotedFunc = nullptr;
1116 if (!ProcessDemoted && canDemoteGlobalVar(GVar, DemotedFunc)) {
1117 O << "// " << GVar->getName() << " has been demoted\n";
1118 localDecls[DemotedFunc].push_back(GVar);
1119 return;
1120 }
1121
1122 O << ".";
1123 emitPTXAddressSpace(GVar->getAddressSpace(), O);
1124
1125 if (isManaged(*GVar)) {
1126 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1128 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1129 O << " .attribute(.managed)";
1130 }
1131
1132 O << " .align "
1133 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1134
1135 if (ETy->isPointerTy() || ((ETy->isIntegerTy() || ETy->isFloatingPointTy()) &&
1136 ETy->getScalarSizeInBits() <= 64)) {
1137 O << " .";
1138 // Special case: ABI requires that we use .u8 for predicates
1139 if (ETy->isIntegerTy(1))
1140 O << "u8";
1141 else
1142 O << getPTXFundamentalTypeStr(ETy, false);
1143 O << " ";
1144 getSymbol(GVar)->print(O, MAI);
1145
1146 // Ptx allows variable initilization only for constant and global state
1147 // spaces.
1148 if (GVar->hasInitializer()) {
1149 if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1150 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1151 const Constant *Initializer = GVar->getInitializer();
1152 // 'undef' is treated as there is no value specified.
1153 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1154 O << " = ";
1155 printScalarConstant(Initializer, O);
1156 }
1157 } else {
1158 // The frontend adds zero-initializer to device and constant variables
1159 // that don't have an initial value, and UndefValue to shared
1160 // variables, so skip warning for this case.
1161 if (!GVar->getInitializer()->isNullValue() &&
1162 !isa<UndefValue>(GVar->getInitializer())) {
1163 report_fatal_error("initial value of '" + GVar->getName() +
1164 "' is not allowed in addrspace(" +
1165 Twine(GVar->getAddressSpace()) + ")");
1166 }
1167 }
1168 }
1169 } else {
1170 // Although PTX has direct support for struct type and array type and
1171 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1172 // targets that support these high level field accesses. Structs, arrays
1173 // and vectors are lowered into arrays of bytes.
1174 switch (ETy->getTypeID()) {
1175 case Type::IntegerTyID: // Integers larger than 64 bits
1176 case Type::FP128TyID:
1177 case Type::StructTyID:
1178 case Type::ArrayTyID:
1179 case Type::FixedVectorTyID: {
1180 const uint64_t ElementSize = DL.getTypeStoreSize(ETy);
1181 // Ptx allows variable initilization only for constant and
1182 // global state spaces.
1183 if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1184 (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1185 GVar->hasInitializer()) {
1186 const Constant *Initializer = GVar->getInitializer();
1187 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1188 AggBuffer aggBuffer(ElementSize, *this);
1189 bufferAggregateConstant(Initializer, &aggBuffer);
1190 if (aggBuffer.numSymbols()) {
1191 const unsigned int ptrSize = MAI.getCodePointerSize();
1192 if (ElementSize % ptrSize ||
1193 !aggBuffer.allSymbolsAligned(ptrSize)) {
1194 // Print in bytes and use the mask() operator for pointers.
1195 if (!STI.hasMaskOperator())
1197 "initialized packed aggregate with pointers '" +
1198 GVar->getName() +
1199 "' requires at least PTX ISA version 7.1");
1200 O << " .u8 ";
1201 getSymbol(GVar)->print(O, MAI);
1202 O << "[" << ElementSize << "] = {";
1203 aggBuffer.printBytes(O);
1204 O << "}";
1205 } else {
1206 O << " .u" << ptrSize * 8 << " ";
1207 getSymbol(GVar)->print(O, MAI);
1208 O << "[" << ElementSize / ptrSize << "] = {";
1209 aggBuffer.printWords(O);
1210 O << "}";
1211 }
1212 } else {
1213 O << " .b8 ";
1214 getSymbol(GVar)->print(O, MAI);
1215 O << "[" << ElementSize << "] = {";
1216 aggBuffer.printBytes(O);
1217 O << "}";
1218 }
1219 } else {
1220 O << " .b8 ";
1221 getSymbol(GVar)->print(O, MAI);
1222 if (ElementSize)
1223 O << "[" << ElementSize << "]";
1224 }
1225 } else {
1226 O << " .b8 ";
1227 getSymbol(GVar)->print(O, MAI);
1228 if (ElementSize)
1229 O << "[" << ElementSize << "]";
1230 }
1231 break;
1232 }
1233 default:
1234 llvm_unreachable("type not supported yet");
1235 }
1236 }
1237 O << ";\n";
1238}
1239
1240void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
1241 const Value *v = Symbols[nSym];
1242 const Value *v0 = SymbolsBeforeStripping[nSym];
1243 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1244 MCSymbol *Name = AP.getSymbol(GVar);
1246 // Is v0 a generic pointer?
1247 bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
1248 if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
1249 os << "generic(";
1250 Name->print(os, AP.MAI);
1251 os << ")";
1252 } else {
1253 Name->print(os, AP.MAI);
1254 }
1255 } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
1256 const MCExpr *Expr = AP.lowerConstantForGV(CExpr, false);
1257 AP.printMCExpr(*Expr, os);
1258 } else
1259 llvm_unreachable("symbol type unknown");
1260}
1261
1262void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
1263 unsigned int ptrSize = AP.MAI.getCodePointerSize();
1264 // Do not emit trailing zero initializers. They will be zero-initialized by
1265 // ptxas. This saves on both space requirements for the generated PTX and on
1266 // memory use by ptxas. (See:
1267 // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space)
1268 unsigned int InitializerCount = Size;
1269 // TODO: symbols make this harder, but it would still be good to trim trailing
1270 // 0s for aggs with symbols as well.
1271 if (numSymbols() == 0)
1272 while (InitializerCount >= 1 && !buffer[InitializerCount - 1])
1273 InitializerCount--;
1274
1275 symbolPosInBuffer.push_back(InitializerCount);
1276 unsigned int nSym = 0;
1277 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1278 for (unsigned int pos = 0; pos < InitializerCount;) {
1279 if (pos)
1280 os << ", ";
1281 if (pos != nextSymbolPos) {
1282 os << (unsigned int)buffer[pos];
1283 ++pos;
1284 continue;
1285 }
1286 // Generate a per-byte mask() operator for the symbol, which looks like:
1287 // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
1288 // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
1289 std::string symText;
1290 llvm::raw_string_ostream oss(symText);
1291 printSymbol(nSym, oss);
1292 for (unsigned i = 0; i < ptrSize; ++i) {
1293 if (i)
1294 os << ", ";
1295 llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
1296 os << "(" << symText << ")";
1297 }
1298 pos += ptrSize;
1299 nextSymbolPos = symbolPosInBuffer[++nSym];
1300 assert(nextSymbolPos >= pos);
1301 }
1302}
1303
1304void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
1305 unsigned int ptrSize = AP.MAI.getCodePointerSize();
1306 symbolPosInBuffer.push_back(Size);
1307 unsigned int nSym = 0;
1308 unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
1309 assert(nextSymbolPos % ptrSize == 0);
1310 for (unsigned int pos = 0; pos < Size; pos += ptrSize) {
1311 if (pos)
1312 os << ", ";
1313 if (pos == nextSymbolPos) {
1314 printSymbol(nSym, os);
1315 nextSymbolPos = symbolPosInBuffer[++nSym];
1316 assert(nextSymbolPos % ptrSize == 0);
1317 assert(nextSymbolPos >= pos + ptrSize);
1318 } else if (ptrSize == 4)
1319 os << support::endian::read32le(&buffer[pos]);
1320 else
1321 os << support::endian::read64le(&buffer[pos]);
1322 }
1323}
1324
1325void NVPTXAsmPrinter::emitDemotedVars(const Function *F, raw_ostream &O) {
1326 auto It = localDecls.find(F);
1327 if (It == localDecls.end())
1328 return;
1329
1330 ArrayRef<const GlobalVariable *> GVars = It->second;
1331
1332 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1333 const NVPTXSubtarget &STI = *NTM.getSubtargetImpl();
1334
1335 for (const GlobalVariable *GV : GVars) {
1336 O << "\t// demoted variable\n\t";
1337 printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1338 }
1339}
1340
1341void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1342 raw_ostream &O) const {
1343 switch (AddressSpace) {
1345 O << "local";
1346 break;
1348 O << "global";
1349 break;
1351 O << "const";
1352 break;
1354 O << "shared";
1355 break;
1356 default:
1357 report_fatal_error("Bad address space found while emitting PTX: " +
1358 llvm::Twine(AddressSpace));
1359 break;
1360 }
1361}
1362
1363std::string
1364NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1365 switch (Ty->getTypeID()) {
1366 case Type::IntegerTyID: {
1367 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1368 if (NumBits == 1)
1369 return "pred";
1370 if (NumBits <= 64) {
1371 std::string name = "u";
1372 return name + utostr(NumBits);
1373 }
1374 llvm_unreachable("Integer too large");
1375 break;
1376 }
1377 case Type::BFloatTyID:
1378 case Type::HalfTyID:
1379 // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
1380 // PTX assembly.
1381 return "b16";
1382 case Type::FloatTyID:
1383 return "f32";
1384 case Type::DoubleTyID:
1385 return "f64";
1386 case Type::PointerTyID: {
1387 unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace());
1388 assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size");
1389
1390 if (PtrSize == 64)
1391 if (useB4PTR)
1392 return "b64";
1393 else
1394 return "u64";
1395 else if (useB4PTR)
1396 return "b32";
1397 else
1398 return "u32";
1399 }
1400 default:
1401 break;
1402 }
1403 llvm_unreachable("unexpected type");
1404}
1405
1406void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1407 raw_ostream &O,
1408 const NVPTXSubtarget &STI) {
1409 const DataLayout &DL = getDataLayout();
1410
1411 // GlobalVariables are always constant pointers themselves.
1412 Type *ETy = GVar->getValueType();
1413
1414 O << ".";
1415 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1416 if (isManaged(*GVar)) {
1417 if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30)
1419 ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1420
1421 O << " .attribute(.managed)";
1422 }
1423 O << " .align "
1424 << GVar->getAlign().value_or(DL.getPrefTypeAlign(ETy)).value();
1425
1426 // Special case for i128/fp128
1427 if (ETy->getScalarSizeInBits() == 128) {
1428 O << " .b8 ";
1429 getSymbol(GVar)->print(O, MAI);
1430 O << "[16]";
1431 return;
1432 }
1433
1434 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1435 O << " ." << getPTXFundamentalTypeStr(ETy) << " ";
1436 getSymbol(GVar)->print(O, MAI);
1437 return;
1438 }
1439
1440 int64_t ElementSize = 0;
1441
1442 // Although PTX has direct support for struct type and array type and LLVM IR
1443 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1444 // support these high level field accesses. Structs and arrays are lowered
1445 // into arrays of bytes.
1446 switch (ETy->getTypeID()) {
1447 case Type::StructTyID:
1448 case Type::ArrayTyID:
1450 ElementSize = DL.getTypeStoreSize(ETy);
1451 O << " .b8 ";
1452 getSymbol(GVar)->print(O, MAI);
1453 O << "[";
1454 if (ElementSize) {
1455 O << ElementSize;
1456 }
1457 O << "]";
1458 break;
1459 default:
1460 llvm_unreachable("type not supported yet");
1461 }
1462}
1463
1464void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1465 const DataLayout &DL = getDataLayout();
1466 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1467 const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1468 const NVPTXMachineFunctionInfo *MFI =
1469 MF ? MF->getInfo<NVPTXMachineFunctionInfo>() : nullptr;
1470
1471 bool IsFirst = true;
1472 const bool IsKernelFunc = isKernelFunction(*F);
1473
1474 // Zero-sized arguments (e.g. empty structs) do not produce a parameter.
1475 // Number the emitted parameters contiguously, skipping the zero-sized ones,
1476 // so that the names match those used in LowerFormalArguments and the
1477 // contiguous numbering used by callers (see LowerCall).
1478 const auto NonEmptyArgs =
1479 make_filter_range(F->args(), [](const Argument &Arg) {
1480 return !Arg.getType()->isEmptyTy();
1481 });
1482
1483 if (NonEmptyArgs.empty() && !F->isVarArg()) {
1484 O << "()";
1485 return;
1486 }
1487
1488 O << "(\n";
1489
1490 for (const auto &[ParamIndex, Arg] : enumerate(NonEmptyArgs)) {
1491 Type *Ty = Arg.getType();
1492 const std::string ParamSym = TLI->getParamName(F, ParamIndex);
1493
1494 if (!IsFirst)
1495 O << ",\n";
1496
1497 IsFirst = false;
1498
1499 // Handle image/sampler parameters
1500 if (IsKernelFunc) {
1501 const PTXOpaqueType ArgOpaqueType = getPTXOpaqueType(Arg);
1502 if (ArgOpaqueType != PTXOpaqueType::None) {
1503 const bool EmitImgPtr = !MFI || !MFI->checkImageHandleSymbol(ParamSym);
1504 O << "\t.param ";
1505 if (EmitImgPtr)
1506 O << ".u64 .ptr ";
1507
1508 switch (ArgOpaqueType) {
1510 O << ".samplerref ";
1511 break;
1513 O << ".texref ";
1514 break;
1516 O << ".surfref ";
1517 break;
1519 llvm_unreachable("handled above");
1520 }
1521 O << ParamSym;
1522 continue;
1523 }
1524 }
1525
1526 if (Arg.hasByValAttr()) {
1527 // param has byVal attribute.
1528 Type *ETy = Arg.getParamByValType();
1529 assert(ETy && "Param should have byval type");
1530
1531 // Print .param .align <a> .b8 .param[size];
1532 // <a> = optimal alignment for the element type; always multiple of
1533 // PAL.getParamAlignment
1534 // size = typeallocsize of element type
1535 const Align OptimalAlign =
1536 IsKernelFunc
1538 F, ETy, Arg.getArgNo() + AttributeList::FirstArgIndex, DL)
1539 : getDeviceByValParamAlign(F, ETy,
1540 Arg.getParamAlign().valueOrOne(), DL);
1541
1542 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1543 << "[" << DL.getTypeAllocSize(ETy) << "]";
1544 continue;
1545 }
1546
1547 if (shouldPassAsArray(Ty)) {
1548 // Just print .param .align <a> .b8 .param[size];
1549 // <a> = optimal alignment for the element type; always multiple of
1550 // PAL.getParamAlignment
1551 // size = typeallocsize of element type
1552 Align OptimalAlign = getPTXParamAlign(
1553 F, Ty, Arg.getArgNo() + AttributeList::FirstArgIndex, DL);
1554
1555 O << "\t.param .align " << OptimalAlign.value() << " .b8 " << ParamSym
1556 << "[" << DL.getTypeAllocSize(Ty) << "]";
1557
1558 continue;
1559 }
1560 // Just a scalar
1561 auto *PTy = dyn_cast<PointerType>(Ty);
1562 unsigned PTySizeInBits = 0;
1563 if (PTy) {
1564 PTySizeInBits =
1565 TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits();
1566 assert(PTySizeInBits && "Invalid pointer size");
1567 }
1568
1569 if (IsKernelFunc) {
1570 if (PTy) {
1571 O << "\t.param .u" << PTySizeInBits << " .ptr";
1572
1573 switch (PTy->getAddressSpace()) {
1574 default:
1575 break;
1577 O << " .global";
1578 break;
1580 O << " .shared";
1581 break;
1583 O << " .const";
1584 break;
1586 O << " .local";
1587 break;
1588 }
1589
1590 O << " .align " << Arg.getParamAlign().valueOrOne().value() << " "
1591 << ParamSym;
1592 continue;
1593 }
1594
1595 // non-pointer scalar to kernel func
1596 O << "\t.param .";
1597 // Special case: predicate operands become .u8 types
1598 if (Ty->isIntegerTy(1))
1599 O << "u8";
1600 else
1601 O << getPTXFundamentalTypeStr(Ty);
1602 O << " " << ParamSym;
1603 continue;
1604 }
1605 // Non-kernel function, just print .param .b<size> for ABI
1606 // and .reg .b<size> for non-ABI
1607 unsigned Size;
1608 if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
1609 Size = promoteScalarArgumentSize(ITy->getBitWidth());
1610 } else if (PTy) {
1611 assert(PTySizeInBits && "Invalid pointer size");
1612 Size = PTySizeInBits;
1613 } else
1615 O << "\t.param .b" << Size << " " << ParamSym;
1616 }
1617
1618 if (F->isVarArg()) {
1619 if (!IsFirst)
1620 O << ",\n";
1621 O << "\t.param .align " << STI.getMaxRequiredAlignment() << " .b8 "
1622 << TLI->getParamName(F, /* vararg */ -1) << "[]";
1623 }
1624
1625 O << "\n)";
1626}
1627
1628void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1629 const MachineFunction &MF) {
1630 SmallString<128> Str;
1631 raw_svector_ostream O(Str);
1632
1633 // Map the global virtual register number to a register class specific
1634 // virtual register number starting from 1 with that class.
1635 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1636
1637 // Emit the Fake Stack Object
1638 const MachineFrameInfo &MFI = MF.getFrameInfo();
1639 int64_t NumBytes = MFI.getStackSize();
1640 if (NumBytes) {
1641 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1642 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1643 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1644 O << "\t.reg .b64 \t%SP;\n"
1645 << "\t.reg .b64 \t%SPL;\n";
1646 } else {
1647 O << "\t.reg .b32 \t%SP;\n"
1648 << "\t.reg .b32 \t%SPL;\n";
1649 }
1650 }
1651
1652 // Go through all virtual registers to establish the mapping between the
1653 // global virtual
1654 // register number and the per class virtual register number.
1655 // We use the per class virtual register number in the ptx output.
1656 for (unsigned I : llvm::seq(MRI->getNumVirtRegs())) {
1658 if (MRI->use_empty(VR) && MRI->def_empty(VR))
1659 continue;
1660 auto &RCRegMap = VRegMapping[MRI->getRegClass(VR)];
1661 RCRegMap[VR] = RCRegMap.size() + 1;
1662 }
1663
1664 // Emit declaration of the virtual registers or 'physical' registers for
1665 // each register class
1666 for (const TargetRegisterClass *RC : TRI->regclasses()) {
1667 const unsigned N = VRegMapping[RC].size();
1668
1669 // Only declare those registers that may be used.
1670 if (N) {
1671 const StringRef RCName = getNVPTXRegClassName(RC);
1672 const StringRef RCStr = getNVPTXRegClassStr(RC);
1673 O << "\t.reg " << RCName << " \t" << RCStr << "<" << (N + 1) << ">;\n";
1674 }
1675 }
1676
1677 OutStreamer->emitRawText(O.str());
1678}
1679
1680/// Translate virtual register numbers in DebugInfo locations to their printed
1681/// encodings, as used by CUDA-GDB.
1682void NVPTXAsmPrinter::encodeDebugInfoRegisterNumbers(
1683 const MachineFunction &MF) {
1684 const NVPTXSubtarget &STI = MF.getSubtarget<NVPTXSubtarget>();
1685 const NVPTXRegisterInfo *registerInfo = STI.getRegisterInfo();
1686
1687 // Clear the old mapping, and add the new one. This mapping is used after the
1688 // printing of the current function is complete, but before the next function
1689 // is printed.
1690 registerInfo->clearDebugRegisterMap();
1691
1692 for (auto &classMap : VRegMapping) {
1693 for (auto &registerMapping : classMap.getSecond()) {
1694 auto reg = registerMapping.getFirst();
1695 registerInfo->addToDebugRegisterMap(reg, getVirtualRegisterName(reg));
1696 }
1697 }
1698}
1699
1700void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp,
1701 raw_ostream &O) const {
1702 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1703 bool ignored;
1704 unsigned int numHex;
1705 const char *lead;
1706
1707 if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1708 numHex = 8;
1709 lead = "0f";
1711 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1712 numHex = 16;
1713 lead = "0d";
1715 } else
1716 llvm_unreachable("unsupported fp type");
1717
1718 APInt API = APF.bitcastToAPInt();
1719 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1720}
1721
1722void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1723 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1724 O << CI->getValue();
1725 return;
1726 }
1727 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1728 printFPConstant(CFP, O);
1729 return;
1730 }
1731 if (isa<ConstantPointerNull>(CPV)) {
1732 O << "0";
1733 return;
1734 }
1735 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1736 const bool IsNonGenericPointer = GVar->getAddressSpace() != 0;
1737 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1738 O << "generic(";
1739 getSymbol(GVar)->print(O, MAI);
1740 O << ")";
1741 } else {
1742 getSymbol(GVar)->print(O, MAI);
1743 }
1744 return;
1745 }
1746 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1747 const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false);
1748 printMCExpr(*E, O);
1749 return;
1750 }
1751 llvm_unreachable("Not scalar type found in printScalarConstant()");
1752}
1753
1754void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1755 AggBuffer *AggBuffer) {
1756 const DataLayout &DL = getDataLayout();
1757 int AllocSize = DL.getTypeAllocSize(CPV->getType());
1758 if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1759 // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1760 // only the space allocated by CPV.
1761 AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1762 return;
1763 }
1764
1765 // Helper for filling AggBuffer with APInts.
1766 auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1767 size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1768 SmallVector<unsigned char, 16> Buf(NumBytes);
1769 // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the
1770 // input's bit width, and i1 arrays may not have a length that is a multuple
1771 // of 8. We handle the last byte separately, so we never request out of
1772 // bounds bits.
1773 for (unsigned I = 0; I < NumBytes - 1; ++I) {
1774 Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1775 }
1776 size_t LastBytePosition = (NumBytes - 1) * 8;
1777 size_t LastByteBits = Val.getBitWidth() - LastBytePosition;
1778 Buf[NumBytes - 1] =
1779 Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition);
1780 AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1781 };
1782
1783 switch (CPV->getType()->getTypeID()) {
1784 case Type::IntegerTyID:
1785 if (const auto *CI = dyn_cast<ConstantInt>(CPV)) {
1786 AddIntToBuffer(CI->getValue());
1787 break;
1788 }
1789 if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1790 if (const auto *CI =
1792 AddIntToBuffer(CI->getValue());
1793 break;
1794 }
1795 if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1796 Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1797 AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1798 AggBuffer->addZeros(AllocSize);
1799 break;
1800 }
1801 // A symbol-relative integer whose offset is applied outside the
1802 // ptrtoint, e.g. add(ptrtoint(@g), C). It can't fold to a ConstantInt
1803 // because it references a symbol; emit it through lowerConstantForGV, the
1804 // same path scalar symbol-relative integer globals use.
1805 AggBuffer->addSymbol(Cexpr, Cexpr);
1806 AggBuffer->addZeros(AllocSize);
1807 break;
1808 }
1809 llvm_unreachable("unsupported integer const type");
1810 break;
1811
1812 case Type::HalfTyID:
1813 case Type::BFloatTyID:
1814 case Type::FloatTyID:
1815 case Type::DoubleTyID:
1816 AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1817 break;
1818
1819 case Type::PointerTyID: {
1820 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1821 AggBuffer->addSymbol(GVar, GVar);
1822 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1823 const Value *v = Cexpr->stripPointerCasts();
1824 AggBuffer->addSymbol(v, Cexpr);
1825 }
1826 AggBuffer->addZeros(AllocSize);
1827 break;
1828 }
1829
1830 case Type::ArrayTyID:
1832 case Type::StructTyID: {
1834 // bufferAggregateConstant doesn't emit tail-padding, i.e. it writes
1835 // `store_size` bytes, not `alloc_size` bytes. Do it ourselves here.
1836 unsigned StartPos = AggBuffer->getCurpos();
1837 bufferAggregateConstant(CPV, AggBuffer);
1838 unsigned Written = AggBuffer->getCurpos() - StartPos;
1839 unsigned SlotSize = std::max<int>(Bytes, AllocSize);
1840 if (SlotSize > Written)
1841 AggBuffer->addZeros(SlotSize - Written);
1842 } else if (isa<ConstantAggregateZero>(CPV))
1843 AggBuffer->addZeros(Bytes);
1844 else
1845 llvm_unreachable("Unexpected Constant type");
1846 break;
1847 }
1848
1849 default:
1850 llvm_unreachable("unsupported type");
1851 }
1852}
1853
1854void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1855 AggBuffer *aggBuffer) {
1856 const DataLayout &DL = getDataLayout();
1857
1858 auto ExtendBuffer = [](APInt Val, AggBuffer *Buffer) {
1859 unsigned NumBytes = divideCeil(Val.getBitWidth(), 8);
1860 for (unsigned I : llvm::seq(NumBytes)) {
1861 unsigned NumBits = std::min(8u, Val.getBitWidth() - I * 8);
1862 Buffer->addByte(Val.extractBitsAsZExtValue(NumBits, I * 8));
1863 }
1864 };
1865
1866 // Integer or floating point vector splats.
1868 if (auto *VTy = dyn_cast<FixedVectorType>(CPV->getType())) {
1869 for (unsigned I : llvm::seq(VTy->getNumElements()))
1870 bufferLEByte(CPV->getAggregateElement(I), 0, aggBuffer);
1871 return;
1872 }
1873 }
1874
1875 // Integers of arbitrary width
1876 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1877 assert(CI->getType()->isIntegerTy() && "Expected integer constant!");
1878 ExtendBuffer(CI->getValue(), aggBuffer);
1879 return;
1880 }
1881
1882 // f128
1883 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1884 assert(CFP->getType()->isFloatingPointTy() && "Expected fp constant!");
1885 if (CFP->getType()->isFP128Ty()) {
1886 ExtendBuffer(CFP->getValueAPF().bitcastToAPInt(), aggBuffer);
1887 return;
1888 }
1889 }
1890
1891 // Buffer arrays one element at a time.
1892 if (isa<ConstantArray>(CPV)) {
1893 for (const auto &Op : CPV->operands())
1894 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1895 return;
1896 }
1897
1898 // Constant vectors
1899 if (const auto *CVec = dyn_cast<ConstantVector>(CPV)) {
1900 bufferAggregateConstVec(CVec, aggBuffer);
1901 return;
1902 }
1903
1904 if (const auto *CDS = dyn_cast<ConstantDataSequential>(CPV)) {
1905 for (unsigned I : llvm::seq(CDS->getNumElements()))
1906 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(I)), 0, aggBuffer);
1907 return;
1908 }
1909
1910 if (isa<ConstantStruct>(CPV)) {
1911 if (CPV->getNumOperands()) {
1912 StructType *ST = cast<StructType>(CPV->getType());
1913 for (unsigned I : llvm::seq(CPV->getNumOperands())) {
1914 int EndOffset = (I + 1 == CPV->getNumOperands())
1915 ? DL.getStructLayout(ST)->getElementOffset(0) +
1916 DL.getTypeAllocSize(ST)
1917 : DL.getStructLayout(ST)->getElementOffset(I + 1);
1918 int Bytes = EndOffset - DL.getStructLayout(ST)->getElementOffset(I);
1919 bufferLEByte(cast<Constant>(CPV->getOperand(I)), Bytes, aggBuffer);
1920 }
1921 }
1922 return;
1923 }
1924 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1925}
1926
1927void NVPTXAsmPrinter::bufferAggregateConstVec(const ConstantVector *CV,
1928 AggBuffer *aggBuffer) {
1929 unsigned NumElems = CV->getType()->getNumElements();
1930 const unsigned BuffSize = aggBuffer->getBufferSize();
1931
1932 // Buffer one element at a time if we have allocated enough buffer space.
1933 if (BuffSize >= NumElems) {
1934 for (const auto &Op : CV->operands())
1935 bufferLEByte(cast<Constant>(Op), 0, aggBuffer);
1936 return;
1937 }
1938
1939 // Sub-byte datatypes will have more elements than bytes allocated for the
1940 // buffer. Merge consecutive elements to form a full byte. We expect that 8 %
1941 // sub-byte-elem-size should be 0 and current expected usage is for i4 (for
1942 // e2m1-fp4 types).
1943 Type *ElemTy = CV->getType()->getElementType();
1944 assert(ElemTy->isIntegerTy() && "Expected integer data type.");
1945 unsigned ElemTySize = ElemTy->getPrimitiveSizeInBits();
1946 assert(ElemTySize < 8 && "Expected sub-byte data type.");
1947 assert(8 % ElemTySize == 0 && "Element type size must evenly divide a byte.");
1948 // Number of elements to merge to form a full byte.
1949 unsigned NumElemsPerByte = 8 / ElemTySize;
1950 unsigned NumCompleteBytes = NumElems / NumElemsPerByte;
1951 unsigned NumTailElems = NumElems % NumElemsPerByte;
1952
1953 // Helper lambda to constant-fold sub-vector of sub-byte type elements into
1954 // i8. Start and end indices of the sub-vector is provided, along with number
1955 // of padding zeros if required.
1956 auto ConvertSubCVtoInt8 = [this, &ElemTy](const ConstantVector *CV,
1957 unsigned Start, unsigned End,
1958 unsigned NumPaddingZeros = 0) {
1959 // Collect elements to create sub-vector.
1960 SmallVector<Constant *, 8> SubCVElems;
1961 for (unsigned I : llvm::seq(Start, End))
1962 SubCVElems.push_back(CV->getAggregateElement(I));
1963
1964 // Optionally pad with zeros.
1965 if (NumPaddingZeros)
1966 SubCVElems.append(NumPaddingZeros, ConstantInt::getNullValue(ElemTy));
1967
1968 auto SubCV = ConstantVector::get(SubCVElems);
1969 Type *Int8Ty = IntegerType::get(SubCV->getContext(), 8);
1970
1971 // Merge elements of the sub-vector using ConstantFolding.
1972 ConstantInt *MergedElem =
1974 ConstantExpr::getBitCast(const_cast<Constant *>(SubCV), Int8Ty),
1975 getDataLayout()));
1976
1977 if (!MergedElem)
1979 "Cannot lower vector global with unusual element type");
1980
1981 return MergedElem;
1982 };
1983
1984 // Iterate through elements of vector one chunk at a time and buffer that
1985 // chunk.
1986 for (unsigned ByteIdx : llvm::seq(NumCompleteBytes))
1987 bufferLEByte(ConvertSubCVtoInt8(CV, ByteIdx * NumElemsPerByte,
1988 (ByteIdx + 1) * NumElemsPerByte),
1989 0, aggBuffer);
1990
1991 // For unevenly sized vectors add tail padding zeros.
1992 if (NumTailElems > 0)
1993 bufferLEByte(ConvertSubCVtoInt8(CV, NumElems - NumTailElems, NumElems,
1994 NumElemsPerByte - NumTailElems),
1995 0, aggBuffer);
1996}
1997
1998/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1999/// a copy from AsmPrinter::lowerConstant, except customized to only handle
2000/// expressions that are representable in PTX and create
2001/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
2002const MCExpr *
2003NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV,
2004 bool ProcessingGeneric) const {
2005 MCContext &Ctx = OutContext;
2006
2007 if (CV->isNullValue() || isa<UndefValue>(CV))
2008 return MCConstantExpr::create(0, Ctx);
2009
2010 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
2011 return MCConstantExpr::create(CI->getZExtValue(), Ctx);
2012
2013 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
2014 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(getSymbol(GV), Ctx);
2015 if (ProcessingGeneric)
2016 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
2017 return Expr;
2018 }
2019
2020 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
2021 if (!CE) {
2022 llvm_unreachable("Unknown constant value to lower!");
2023 }
2024
2025 switch (CE->getOpcode()) {
2026 default:
2027 break; // Error
2028
2029 case Instruction::AddrSpaceCast: {
2030 // Strip the addrspacecast and pass along the operand
2031 PointerType *DstTy = cast<PointerType>(CE->getType());
2032 if (DstTy->getAddressSpace() == 0)
2033 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2034
2035 break; // Error
2036 }
2037
2038 case Instruction::GetElementPtr: {
2039 const DataLayout &DL = getDataLayout();
2040
2041 // Generate a symbolic expression for the byte address
2042 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2043 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2044
2045 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2046 ProcessingGeneric);
2047 if (!OffsetAI)
2048 return Base;
2049
2050 int64_t Offset = OffsetAI.getSExtValue();
2052 Ctx);
2053 }
2054
2055 case Instruction::Trunc:
2056 // We emit the value and depend on the assembler to truncate the generated
2057 // expression properly. This is important for differences between
2058 // blockaddress labels. Since the two labels are in the same function, it
2059 // is reasonable to treat their delta as a 32-bit value.
2060 [[fallthrough]];
2061 case Instruction::BitCast:
2062 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2063
2064 case Instruction::IntToPtr: {
2065 const DataLayout &DL = getDataLayout();
2066
2067 // Handle casts to pointers by changing them into casts to the appropriate
2068 // integer type. This promotes constant folding and simplifies this code.
2069 Constant *Op = CE->getOperand(0);
2070 Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2071 /*IsSigned*/ false, DL);
2072 if (Op)
2073 return lowerConstantForGV(Op, ProcessingGeneric);
2074
2075 break; // Error
2076 }
2077
2078 case Instruction::PtrToInt: {
2079 const DataLayout &DL = getDataLayout();
2080
2081 // Support only foldable casts to/from pointers that can be eliminated by
2082 // changing the pointer to the appropriately sized integer type.
2083 Constant *Op = CE->getOperand(0);
2084 Type *Ty = CE->getType();
2085
2086 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2087
2088 // We can emit the pointer value into this slot if the slot is an
2089 // integer slot equal to the size of the pointer.
2090 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2091 return OpExpr;
2092
2093 // Otherwise the pointer is smaller than the resultant integer, mask off
2094 // the high bits so we are sure to get a proper truncation if the input is
2095 // a constant expr.
2096 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2097 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2098 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2099 }
2100
2101 // The MC library also has a right-shift operator, but it isn't consistently
2102 // signed or unsigned between different targets.
2103 case Instruction::Add: {
2104 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2105 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2106 switch (CE->getOpcode()) {
2107 default: llvm_unreachable("Unknown binary operator constant cast expr");
2108 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2109 }
2110 }
2111 }
2112
2113 // If the code isn't optimized, there may be outstanding folding
2114 // opportunities. Attempt to fold the expression using DataLayout as a
2115 // last resort before giving up.
2117 if (C != CE)
2118 return lowerConstantForGV(C, ProcessingGeneric);
2119
2120 // Otherwise report the problem to the user.
2121 std::string S;
2122 raw_string_ostream OS(S);
2123 OS << "Unsupported expression in static initializer: ";
2124 CE->printAsOperand(OS, /*PrintType=*/false,
2125 !MF ? nullptr : MF->getFunction().getParent());
2126 report_fatal_error(Twine(OS.str()));
2127}
2128
2129void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) const {
2130 OutContext.getAsmInfo().printExpr(OS, Expr);
2131}
2132
2133/// PrintAsmOperand - Print out an operand for an inline asm expression.
2134///
2135bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2136 const char *ExtraCode, raw_ostream &O) {
2137 if (ExtraCode && ExtraCode[0]) {
2138 if (ExtraCode[1] != 0)
2139 return true; // Unknown modifier.
2140
2141 switch (ExtraCode[0]) {
2142 default:
2143 // See if this is a generic print operand
2144 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2145 case 'r':
2146 break;
2147 }
2148 }
2149
2150 printOperand(MI, OpNo, O);
2151
2152 return false;
2153}
2154
2155bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2156 unsigned OpNo,
2157 const char *ExtraCode,
2158 raw_ostream &O) {
2159 if (ExtraCode && ExtraCode[0])
2160 return true; // Unknown modifier
2161
2162 O << '[';
2163 printMemOperand(MI, OpNo, O);
2164 O << ']';
2165
2166 return false;
2167}
2168
2169void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
2170 raw_ostream &O) {
2171 const MachineOperand &MO = MI->getOperand(OpNum);
2172 switch (MO.getType()) {
2174 if (MO.getReg().isPhysical()) {
2175 if (MO.getReg() == NVPTX::VRDepot)
2177 else
2179 } else {
2180 emitVirtualRegister(MO.getReg(), O);
2181 }
2182 break;
2183
2185 O << MO.getImm();
2186 break;
2187
2189 printFPConstant(MO.getFPImm(), O);
2190 break;
2191
2193 PrintSymbolOperand(MO, O);
2194 break;
2195
2197 MO.getMBB()->getSymbol()->print(O, MAI);
2198 break;
2199
2200 default:
2201 llvm_unreachable("Operand type not supported.");
2202 }
2203}
2204
2205void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum,
2206 raw_ostream &O, const char *Modifier) {
2207 printOperand(MI, OpNum, O);
2208
2209 if (Modifier && strcmp(Modifier, "add") == 0) {
2210 O << ", ";
2211 printOperand(MI, OpNum + 1, O);
2212 } else {
2213 if (MI->getOperand(OpNum + 1).isImm() &&
2214 MI->getOperand(OpNum + 1).getImm() == 0)
2215 return; // don't print ',0' or '+0'
2216 O << "+";
2217 printOperand(MI, OpNum + 1, O);
2218 }
2219}
2220
2221/// Returns true if \p Line begins with an alphabetic character or underscore,
2222/// indicating it is a PTX instruction that should receive a .loc directive.
2223static bool isPTXInstruction(StringRef Line) {
2224 StringRef Trimmed = Line.ltrim();
2225 return !Trimmed.empty() &&
2226 (std::isalpha(static_cast<unsigned char>(Trimmed[0])) ||
2227 Trimmed[0] == '_');
2228}
2229
2230/// Returns the DILocation for an inline asm MachineInstr if debug line info
2231/// should be emitted, or nullptr otherwise.
2233 if (!MI || !MI->getDebugLoc())
2234 return nullptr;
2235 const DISubprogram *SP = MI->getMF()->getFunction().getSubprogram();
2236 if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
2237 return nullptr;
2238 const DILocation *DL = MI->getDebugLoc();
2239 if (!DL->getFile() || !DL->getLine())
2240 return nullptr;
2241 return DL;
2242}
2243
2244namespace {
2245struct InlineAsmInliningContext {
2246 MCSymbol *FuncNameSym = nullptr;
2247 unsigned FileIA = 0;
2248 unsigned LineIA = 0;
2249 unsigned ColIA = 0;
2250
2251 bool hasInlinedAt() const { return FuncNameSym != nullptr; }
2252};
2253} // namespace
2254
2255/// Resolves the enhanced-lineinfo inlining context for an inline asm debug
2256/// location. Returns a default (empty) context if inlining info is unavailable.
2257static InlineAsmInliningContext
2259 NVPTXDwarfDebug *NVDD, MCStreamer &Streamer,
2260 unsigned CUID) {
2261 InlineAsmInliningContext Ctx;
2262 const DILocation *InlinedAt = DL->getInlinedAt();
2263 if (!InlinedAt || !InlinedAt->getFile() || !NVDD ||
2264 !NVDD->isEnhancedLineinfo(MF))
2265 return Ctx;
2266 const auto *SubProg = getDISubprogram(DL->getScope());
2267 if (!SubProg)
2268 return Ctx;
2269 Ctx.FuncNameSym = NVDD->getOrCreateFuncNameSymbol(SubProg->getLinkageName());
2270 Ctx.FileIA = Streamer.emitDwarfFileDirective(
2271 0, InlinedAt->getFile()->getDirectory(),
2272 InlinedAt->getFile()->getFilename(), std::nullopt, std::nullopt, CUID);
2273 Ctx.LineIA = InlinedAt->getLine();
2274 Ctx.ColIA = InlinedAt->getColumn();
2275 return Ctx;
2276}
2277
2278void NVPTXAsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
2279 const MCTargetOptions &MCOptions,
2280 const MDNode *LocMDNode,
2281 InlineAsm::AsmDialect Dialect,
2282 const MachineInstr *MI) {
2283 assert(!Str.empty() && "Can't emit empty inline asm block");
2284 if (Str.back() == 0)
2285 Str = Str.substr(0, Str.size() - 1);
2286
2287 auto emitAsmStr = [&](StringRef AsmStr) {
2289 OutStreamer->emitRawText(AsmStr);
2290 emitInlineAsmEnd(STI, nullptr, MI);
2291 };
2292
2293 const DILocation *DL = getInlineAsmDebugLoc(MI);
2294 if (!DL) {
2295 emitAsmStr(Str);
2296 return;
2297 }
2298
2299 const DIFile *File = DL->getFile();
2300 unsigned Line = DL->getLine();
2301 const unsigned Column = DL->getColumn();
2302 const unsigned CUID = OutStreamer->getContext().getDwarfCompileUnitID();
2303 const unsigned FileNumber = OutStreamer->emitDwarfFileDirective(
2304 0, File->getDirectory(), File->getFilename(), std::nullopt, std::nullopt,
2305 CUID);
2306
2307 auto *NVDD = static_cast<NVPTXDwarfDebug *>(getDwarfDebug());
2308 InlineAsmInliningContext InlineCtx =
2309 getInlineAsmInliningContext(DL, *MI->getMF(), NVDD, *OutStreamer, CUID);
2310
2311 SmallVector<StringRef, 16> Lines;
2312 Str.split(Lines, '\n');
2314 for (const StringRef &L : Lines) {
2315 StringRef RTrimmed = L.rtrim('\r');
2316 if (isPTXInstruction(L)) {
2317 if (InlineCtx.hasInlinedAt()) {
2318 OutStreamer->emitDwarfLocDirectiveWithInlinedAt(
2319 FileNumber, Line, Column, InlineCtx.FileIA, InlineCtx.LineIA,
2320 InlineCtx.ColIA, InlineCtx.FuncNameSym, DWARF2_FLAG_IS_STMT, 0, 0,
2321 File->getFilename());
2322 } else {
2323 OutStreamer->emitDwarfLocDirective(FileNumber, Line, Column,
2324 DWARF2_FLAG_IS_STMT, 0, 0,
2325 File->getFilename());
2326 }
2327 }
2328 OutStreamer->emitRawText(RTrimmed);
2329 ++Line;
2330 }
2331 emitInlineAsmEnd(STI, nullptr, MI);
2332}
2333
2334char NVPTXAsmPrinter::ID = 0;
2335
2336INITIALIZE_PASS(NVPTXAsmPrinter, "nvptx-asm-printer", "NVPTX Assembly Printer",
2337 false, false)
2338
2339// Force static initialization.
2340extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
2341LLVMInitializeNVPTXAsmPrinter() {
2344}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:215
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define DWARF2_FLAG_IS_STMT
Definition MCDwarf.h:119
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static StringRef getTextureName(const Value &V)
static const DILocation * getInlineAsmDebugLoc(const MachineInstr *MI)
Returns the DILocation for an inline asm MachineInstr if debug line info should be emitted,...
#define DEPOTNAME
static void discoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
discoverDependentGlobals - Return a set of GlobalVariables on which V depends.
static bool hasFullDebugInfo(Module &M)
static StringRef getSurfaceName(const Value &V)
static bool canDemoteGlobalVar(const GlobalVariable *GV, Function const *&f)
static StringRef getSamplerName(const Value &V)
static bool useFuncSeen(const Constant *C, const SmallPtrSetImpl< const Function * > &SeenSet)
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
static bool usedInGlobalVarDef(const Constant *C)
static InlineAsmInliningContext getInlineAsmInliningContext(const DILocation *DL, const MachineFunction &MF, NVPTXDwarfDebug *NVDD, MCStreamer &Streamer, unsigned CUID)
Resolves the enhanced-lineinfo inlining context for an inline asm debug location.
static bool isPTXInstruction(StringRef Line)
Returns true if Line begins with an alphabetic character or underscore, indicating it is a PTX instru...
static bool usedInOneFunc(const User *U, Function const *&OneFunc)
static void emitInitialRawDwarfLocDirective(const MachineFunction &MF, DwarfDebug *DD, MCStreamer &OutStreamer)
Emits initial debug location directive.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file contains some templates that are useful if you are working with the STL at all.
static const char * name
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallString class.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
@ __CLK_ADDRESS_BASE
@ __CLK_FILTER_BASE
@ __CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_MASK
@ __CLK_ADDRESS_MASK
@ __CLK_FILTER_MASK
static const fltSemantics & IEEEsingle()
Definition APFloat.h:297
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5901
APInt bitcastToAPInt() const
Definition APFloat.h:1436
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition APInt.cpp:521
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
MCSymbol * getSymbol(const GlobalValue *GV) const
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
DwarfDebug * getDwarfDebug()
Definition AsmPrinter.h:290
virtual void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo, const MachineInstr *MI)
Let the target do anything it needs to do after emitting inlineasm.
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:94
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:109
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition AsmPrinter.h:515
virtual void emitFunctionBodyStart()
Targets can override this to emit stuff before the first basic block in the function.
Definition AsmPrinter.h:622
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:128
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:101
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition AsmPrinter.h:453
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:106
const MCAsmInfo & MAI
Target Asm Printer information.
Definition AsmPrinter.h:97
virtual void emitFunctionBodyEnd()
Targets can override this to emit stuff after the last basic block in the function.
Definition AsmPrinter.h:626
const DataLayout & getDataLayout() const
Return information about data layout.
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
MCSymbol * GetExternalSymbolSymbol(const Twine &Sym) const
Return the MCSymbol for the specified ExternalSymbol.
const MCSubtargetInfo & getSubtargetInfo() const
Return information about subtarget.
virtual void emitInlineAsmStart() const
Let the target do anything it needs to do before emitting inlineasm.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
MaybeAlign getParamStackAlign(unsigned ArgNo) const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
unsigned arg_size() const
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
const APFloat & getValueAPF() const
Definition Constants.h:463
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
FixedVectorType * getType() const
Specialize the getType() method to always return a FixedVectorType, which reduces the amount of casti...
Definition Constants.h:697
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constant.h:64
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Subprogram description. Uses SubclassData1.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT, true > const_iterator
Definition DenseMap.h:136
iterator end()
Definition DenseMap.h:143
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Collects and handles dwarf debug information.
Definition DwarfDebug.h:352
const MachineInstr * emitInitialLocDirective(const MachineFunction &MF, unsigned CUID)
Emits inital debug location directive.
unsigned getNumElements() const
Type * getReturnType() const
DISubprogram * getSubprogram() const
Get the attached subprogram.
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:668
StringRef getSection() const
Get the custom section of this global if it has one.
bool hasSection() const
Check if this global has a custom object file section.
bool hasLinkOnceLinkage() const
bool hasExternalLinkage() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:346
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
unsigned getAddressSpace() const
PointerType * getType() const
Global values are always pointers.
bool hasWeakLinkage() const
bool hasCommonLinkage() const
bool hasAvailableExternallyLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:348
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isLoopHeader(const BlockT *BB) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:342
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:347
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
void addOperand(const MCOperand Op)
Definition MCInst.h:215
void setOpcode(unsigned Op)
Definition MCInst.h:201
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
Streaming machine code generation interface.
Definition MCStreamer.h:222
virtual bool hasRawTextSupport() const
Return true if this asm streamer supports emitting unformatted text to the .s file with EmitRawText.
Definition MCStreamer.h:385
unsigned emitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename, std::optional< MD5::MD5Result > Checksum=std::nullopt, std::optional< StringRef > Source=std::nullopt, unsigned CUID=0)
Associate a filename with a specified logical file number.
Definition MCStreamer.h:889
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:213
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
LLVM_ABI void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition MCSymbol.cpp:59
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
iterator_range< pred_iterator > predecessors()
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_MachineBasicBlock
MachineBasicBlock reference.
@ MO_Register
Register operand.
@ MO_ExternalSymbol
Name of external global symbol.
@ MO_FPImmediate
Floating-point immediate operand.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
DwarfDebug * createDwarfDebug() override
Create NVPTX-specific DwarfDebug handler.
std::string getVirtualRegisterName(unsigned) const
bool doFinalization(Module &M) override
Shut down the asmprinter.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
NVPTX-specific DwarfDebug implementation.
bool isEnhancedLineinfo(const MachineFunction &MF) const
Returns true if the enhanced lineinfo mode (with inlined_at) is active for the given MachineFunction.
MCSymbol * getOrCreateFuncNameSymbol(StringRef LinkageName)
Get or create an MCSymbol in .debug_str for a function's linkage name.
static const NVPTXFloatMCExpr * createConstantBFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:44
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:49
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:54
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition NVPTXMCExpr.h:59
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
static const char * getRegisterName(MCRegister Reg)
bool checkImageHandleSymbol(StringRef Symbol) const
Check if the symbol has a mapping.
const char * getName(unsigned RegNo) const
std::string getTargetName() const
unsigned getMaxRequiredAlignment() const
bool hasMaskOperator() const
const NVPTXTargetLowering * getTargetLowering() const override
unsigned getPTXVersion() const
const NVPTXRegisterInfo * getRegisterInfo() const override
unsigned int getSmVersion() const
NVPTX::DrvInterface getDrvInterface() const
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Implments NVPTX-specific streamer.
void outputDwarfFileDirectives()
Outputs the list of the DWARF '.file' directives to the streamer.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
iterator begin() const
Definition StringRef.h:114
StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
Definition StringRef.h:826
iterator end() const
Definition StringRef.h:116
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:180
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
@ ArrayTyID
Arrays.
Definition Type.h:76
@ HalfTyID
16-bit floating point type
Definition Type.h:57
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ StructTyID
Structures.
Definition Type.h:75
@ IntegerTyID
Arbitrary bit width integers.
Definition Type.h:71
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition Type.h:77
@ BFloatTyID
16-bit floating point type (7-bit significand)
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ PointerTyID
Pointers.
Definition Type.h:74
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:138
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
op_range operands()
Definition User.h:267
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
size_type size() const
Definition DenseSet.h:87
bool erase(const ValueT &V)
Definition DenseSet.h:100
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:190
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
constexpr StringLiteral BlocksAreClusters("nvvm.blocksareclusters")
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:50
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
uint64_t read64le(const void *P)
Definition Endian.h:435
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2180
bool isManaged(const Value &V)
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC)
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
std::optional< unsigned > getMaxNReg(const Function &F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition STLExtras.h:2275
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::string utostr(uint64_t X, bool isNeg=false)
std::optional< unsigned > getMinCTASm(const Function &F)
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
SmallVector< unsigned, 3 > getReqNTID(const Function &F)
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
unsigned promoteScalarArgumentSize(unsigned size)
void clearAnnotationCache(const Module *Mod)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool shouldPassAsArray(Type *Ty)
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC)
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:551
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition Format.h:169
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
std::optional< unsigned > getMaxClusterRank(const Function &F)
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
LLVM_ABI void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
DWARFExpression::Operation Op
Align getPTXParamAlign(const Function *F, Type *Ty, unsigned AttrIdx, const DataLayout &DL)
Get the alignment for a function parameter or return value.
ArrayRef(const T &OneElt) -> ArrayRef< T >
Align getDeviceByValParamAlign(const Function *F, Type *ArgTy, Align InitialAlign, const DataLayout &DL)
Target & getTheNVPTXTarget64()
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
bool hasBlocksAreClusters(const Function &F)
SmallVector< unsigned, 3 > getClusterDim(const Function &F)
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
PTXOpaqueType getPTXOpaqueType(const GlobalVariable &GV)
LLVM_ABI MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
LLVM_ABI DISubprogram * getDISubprogram(const MDNode *Scope)
Find subprogram that is enclosing this scope.
Target & getTheNVPTXTarget32()
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...