LLVM 22.0.0git
MicrosoftDemangle.cpp
Go to the documentation of this file.
1//===- MicrosoftDemangle.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a demangler for MSVC-style mangled symbols.
10//
11// This file has no dependencies on the rest of LLVM so that it can be
12// easily reused in other programs such as libcxxabi.
13//
14//===----------------------------------------------------------------------===//
15
17
23
24#include <array>
25#include <cctype>
26#include <cstdio>
27#include <optional>
28#include <string_view>
29#include <tuple>
30
31using namespace llvm;
32using namespace ms_demangle;
33
34static bool startsWithDigit(std::string_view S) {
35 return !S.empty() && std::isdigit(S.front());
36}
37
38struct NodeList {
39 Node *N = nullptr;
40 NodeList *Next = nullptr;
41};
42
43static bool consumeFront(std::string_view &S, char C) {
44 if (!llvm::itanium_demangle::starts_with(S, C))
45 return false;
46 S.remove_prefix(1);
47 return true;
48}
49
50static bool consumeFront(std::string_view &S, std::string_view C) {
51 if (!llvm::itanium_demangle::starts_with(S, C))
52 return false;
53 S.remove_prefix(C.size());
54 return true;
55}
56
57static bool consumeFront(std::string_view &S, std::string_view PrefixA,
58 std::string_view PrefixB, bool A) {
59 const std::string_view &Prefix = A ? PrefixA : PrefixB;
60 return consumeFront(S, Prefix);
61}
62
63static bool startsWith(std::string_view S, std::string_view PrefixA,
64 std::string_view PrefixB, bool A) {
65 const std::string_view &Prefix = A ? PrefixA : PrefixB;
66 return llvm::itanium_demangle::starts_with(S, Prefix);
67}
68
69bool Demangler::isMemberPointer(std::string_view MangledName, bool &Error) {
70 Error = false;
71 const char F = MangledName.front();
72 MangledName.remove_prefix(1);
73 switch (F) {
74 case '$':
75 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
76 // rvalue reference to a member.
77 return false;
78 case 'A':
79 // 'A' indicates a reference, and you cannot have a reference to a member
80 // function or member.
81 return false;
82 case 'P':
83 case 'Q':
84 case 'R':
85 case 'S':
86 // These 4 values indicate some kind of pointer, but we still don't know
87 // what.
88 break;
89 default:
90 // isMemberPointer() is called only if isPointerType() returns true,
91 // and it rejects other prefixes.
93 }
94
95 // If it starts with a number, then 6 indicates a non-member function
96 // pointer, and 8 indicates a member function pointer.
97 if (startsWithDigit(MangledName)) {
98 if (MangledName[0] != '6' && MangledName[0] != '8') {
99 Error = true;
100 return false;
101 }
102 return (MangledName[0] == '8');
103 }
104
105 // Remove ext qualifiers since those can appear on either type and are
106 // therefore not indicative.
107 consumeFront(MangledName, 'E'); // 64-bit
108 consumeFront(MangledName, 'I'); // restrict
109 consumeFront(MangledName, 'F'); // unaligned
110 demanglePointerAuthQualifier(MangledName);
111
112 if (MangledName.empty()) {
113 Error = true;
114 return false;
115 }
116
117 // The next value should be either ABCD (non-member) or QRST (member).
118 switch (MangledName.front()) {
119 case 'A':
120 case 'B':
121 case 'C':
122 case 'D':
123 return false;
124 case 'Q':
125 case 'R':
126 case 'S':
127 case 'T':
128 return true;
129 default:
130 Error = true;
131 return false;
132 }
133}
134
136consumeSpecialIntrinsicKind(std::string_view &MangledName) {
137 if (consumeFront(MangledName, "?_7"))
139 if (consumeFront(MangledName, "?_8"))
141 if (consumeFront(MangledName, "?_9"))
143 if (consumeFront(MangledName, "?_A"))
145 if (consumeFront(MangledName, "?_B"))
147 if (consumeFront(MangledName, "?_C"))
149 if (consumeFront(MangledName, "?_P"))
151 if (consumeFront(MangledName, "?_R0"))
153 if (consumeFront(MangledName, "?_R1"))
155 if (consumeFront(MangledName, "?_R2"))
157 if (consumeFront(MangledName, "?_R3"))
159 if (consumeFront(MangledName, "?_R4"))
161 if (consumeFront(MangledName, "?_S"))
163 if (consumeFront(MangledName, "?__E"))
165 if (consumeFront(MangledName, "?__F"))
167 if (consumeFront(MangledName, "?__J"))
170}
171
172static bool startsWithLocalScopePattern(std::string_view S) {
173 if (!consumeFront(S, '?'))
174 return false;
175
176 size_t End = S.find('?');
177 if (End == std::string_view::npos)
178 return false;
179 std::string_view Candidate = S.substr(0, End);
180 if (Candidate.empty())
181 return false;
182
183 // \?[0-9]\?
184 // ?@? is the discriminator 0.
185 if (Candidate.size() == 1)
186 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
187
188 // If it's not 0-9, then it's an encoded number terminated with an @
189 if (Candidate.back() != '@')
190 return false;
191 Candidate.remove_suffix(1);
192
193 // An encoded number starts with B-P and all subsequent digits are in A-P.
194 // Note that the reason the first digit cannot be A is two fold. First, it
195 // would create an ambiguity with ?A which delimits the beginning of an
196 // anonymous namespace. Second, A represents 0, and you don't start a multi
197 // digit number with a leading 0. Presumably the anonymous namespace
198 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
199 if (Candidate[0] < 'B' || Candidate[0] > 'P')
200 return false;
201 Candidate.remove_prefix(1);
202 while (!Candidate.empty()) {
203 if (Candidate[0] < 'A' || Candidate[0] > 'P')
204 return false;
205 Candidate.remove_prefix(1);
206 }
207
208 return true;
209}
210
211static bool isTagType(std::string_view S) {
212 switch (S.front()) {
213 case 'T': // union
214 case 'U': // struct
215 case 'V': // class
216 case 'W': // enum
217 return true;
218 }
219 return false;
220}
221
222static bool isCustomType(std::string_view S) { return S[0] == '?'; }
223
224static bool isPointerType(std::string_view S) {
225 if (llvm::itanium_demangle::starts_with(S, "$$Q")) // foo &&
226 return true;
227
228 switch (S.front()) {
229 case 'A': // foo &
230 case 'P': // foo *
231 case 'Q': // foo *const
232 case 'R': // foo *volatile
233 case 'S': // foo *const volatile
234 return true;
235 }
236 return false;
237}
238
239static bool isArrayType(std::string_view S) { return S[0] == 'Y'; }
240
241static bool isFunctionType(std::string_view S) {
242 return llvm::itanium_demangle::starts_with(S, "$$A8@@") ||
243 llvm::itanium_demangle::starts_with(S, "$$A6");
244}
245
247demangleFunctionRefQualifier(std::string_view &MangledName) {
248 if (consumeFront(MangledName, 'G'))
250 else if (consumeFront(MangledName, 'H'))
253}
254
255static std::pair<Qualifiers, PointerAffinity>
256demanglePointerCVQualifiers(std::string_view &MangledName) {
257 if (consumeFront(MangledName, "$$Q"))
258 return std::make_pair(Q_None, PointerAffinity::RValueReference);
259
260 const char F = MangledName.front();
261 MangledName.remove_prefix(1);
262 switch (F) {
263 case 'A':
264 return std::make_pair(Q_None, PointerAffinity::Reference);
265 case 'P':
266 return std::make_pair(Q_None, PointerAffinity::Pointer);
267 case 'Q':
268 return std::make_pair(Q_Const, PointerAffinity::Pointer);
269 case 'R':
270 return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
271 case 'S':
272 return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
274 }
275 // This function is only called if isPointerType() returns true,
276 // and it only returns true for the six cases listed above.
278}
279
281 size_t Count) {
283 N->Count = Count;
284 N->Nodes = Arena.allocArray<Node *>(Count);
285 for (size_t I = 0; I < Count; ++I) {
286 N->Nodes[I] = Head->N;
287 Head = Head->Next;
288 }
289 return N;
290}
291
292std::string_view Demangler::copyString(std::string_view Borrowed) {
293 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size());
294 // This is not a micro-optimization, it avoids UB, should Borrowed be an null
295 // buffer.
296 if (Borrowed.size())
297 std::memcpy(Stable, Borrowed.data(), Borrowed.size());
298
299 return {Stable, Borrowed.size()};
300}
301
303Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName,
305 NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
306 switch (K) {
308 NI->Name = "`vftable'";
309 break;
311 NI->Name = "`vbtable'";
312 break;
314 NI->Name = "`local vftable'";
315 break;
317 NI->Name = "`RTTI Complete Object Locator'";
318 break;
319 default:
321 }
322 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
323 SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
324 STSN->Name = QN;
325 bool IsMember = false;
326 if (MangledName.empty()) {
327 Error = true;
328 return nullptr;
329 }
330 char Front = MangledName.front();
331 MangledName.remove_prefix(1);
332 if (Front != '6' && Front != '7') {
333 Error = true;
334 return nullptr;
335 }
336
337 std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
338
339 NodeList *TargetCurrent = nullptr;
340 NodeList *TargetHead = nullptr;
341 size_t Count = 0;
342 while (!consumeFront(MangledName, '@')) {
343 ++Count;
344
345 NodeList *Next = Arena.alloc<NodeList>();
346 if (TargetCurrent)
347 TargetCurrent->Next = Next;
348 else
349 TargetHead = Next;
350
351 TargetCurrent = Next;
352 QualifiedNameNode *QN = demangleFullyQualifiedTypeName(MangledName);
353 if (Error)
354 return nullptr;
355 assert(QN);
356 TargetCurrent->N = QN;
357 }
358
359 if (Count > 0)
360 STSN->TargetNames = nodeListToNodeArray(Arena, TargetHead, Count);
361
362 return STSN;
363}
364
366Demangler::demangleLocalStaticGuard(std::string_view &MangledName,
367 bool IsThread) {
368 LocalStaticGuardIdentifierNode *LSGI =
369 Arena.alloc<LocalStaticGuardIdentifierNode>();
370 LSGI->IsThread = IsThread;
371 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
372 LocalStaticGuardVariableNode *LSGVN =
373 Arena.alloc<LocalStaticGuardVariableNode>();
374 LSGVN->Name = QN;
375
376 if (consumeFront(MangledName, "4IA"))
377 LSGVN->IsVisible = false;
378 else if (consumeFront(MangledName, "5"))
379 LSGVN->IsVisible = true;
380 else {
381 Error = true;
382 return nullptr;
383 }
384
385 if (!MangledName.empty())
386 LSGI->ScopeIndex = demangleUnsigned(MangledName);
387 return LSGVN;
388}
389
391 std::string_view Name) {
393 Id->Name = Name;
394 return Id;
395}
396
398 IdentifierNode *Identifier) {
400 QN->Components = Arena.alloc<NodeArrayNode>();
401 QN->Components->Count = 1;
402 QN->Components->Nodes = Arena.allocArray<Node *>(1);
403 QN->Components->Nodes[0] = Identifier;
404 return QN;
405}
406
408 std::string_view Name) {
410 return synthesizeQualifiedName(Arena, Id);
411}
412
414 TypeNode *Type,
415 std::string_view VariableName) {
417 VSN->Type = Type;
418 VSN->Name = synthesizeQualifiedName(Arena, VariableName);
419 return VSN;
420}
421
423Demangler::demangleUntypedVariable(ArenaAllocator &Arena,
424 std::string_view &MangledName,
425 std::string_view VariableName) {
426 NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
427 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
428 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
429 VSN->Name = QN;
430 if (consumeFront(MangledName, "8"))
431 return VSN;
432
433 Error = true;
434 return nullptr;
435}
436
438Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
439 std::string_view &MangledName) {
440 RttiBaseClassDescriptorNode *RBCDN =
441 Arena.alloc<RttiBaseClassDescriptorNode>();
442 RBCDN->NVOffset = demangleUnsigned(MangledName);
443 RBCDN->VBPtrOffset = demangleSigned(MangledName);
444 RBCDN->VBTableOffset = demangleUnsigned(MangledName);
445 RBCDN->Flags = demangleUnsigned(MangledName);
446 if (Error)
447 return nullptr;
448
449 VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
450 VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
451 consumeFront(MangledName, '8');
452 return VSN;
453}
454
456Demangler::demangleInitFiniStub(std::string_view &MangledName,
457 bool IsDestructor) {
458 DynamicStructorIdentifierNode *DSIN =
459 Arena.alloc<DynamicStructorIdentifierNode>();
460 DSIN->IsDestructor = IsDestructor;
461
462 bool IsKnownStaticDataMember = false;
463 if (consumeFront(MangledName, '?'))
464 IsKnownStaticDataMember = true;
465
466 SymbolNode *Symbol = demangleDeclarator(MangledName);
467 if (Error)
468 return nullptr;
469
470 FunctionSymbolNode *FSN = nullptr;
471
472 if (Symbol->kind() == NodeKind::VariableSymbol) {
473 DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
474
475 // Older versions of clang mangled this type of symbol incorrectly. They
476 // would omit the leading ? and they would only emit a single @ at the end.
477 // The correct mangling is a leading ? and 2 trailing @ signs. Handle
478 // both cases.
479 int AtCount = IsKnownStaticDataMember ? 2 : 1;
480 for (int I = 0; I < AtCount; ++I) {
481 if (consumeFront(MangledName, '@'))
482 continue;
483 Error = true;
484 return nullptr;
485 }
486
487 FSN = demangleFunctionEncoding(MangledName);
488 if (FSN)
489 FSN->Name = synthesizeQualifiedName(Arena, DSIN);
490 } else {
491 if (IsKnownStaticDataMember) {
492 // This was supposed to be a static data member, but we got a function.
493 Error = true;
494 return nullptr;
495 }
496
497 FSN = static_cast<FunctionSymbolNode *>(Symbol);
498 DSIN->Name = Symbol->Name;
499 FSN->Name = synthesizeQualifiedName(Arena, DSIN);
500 }
501
502 return FSN;
503}
504
505SymbolNode *Demangler::demangleSpecialIntrinsic(std::string_view &MangledName) {
507
508 switch (SIK) {
510 return nullptr;
512 return demangleStringLiteral(MangledName);
517 return demangleSpecialTableSymbolNode(MangledName, SIK);
519 return demangleVcallThunkNode(MangledName);
521 return demangleLocalStaticGuard(MangledName, /*IsThread=*/false);
523 return demangleLocalStaticGuard(MangledName, /*IsThread=*/true);
525 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
526 if (Error)
527 break;
528 if (!consumeFront(MangledName, "@8"))
529 break;
530 if (!MangledName.empty())
531 break;
532 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor'");
533 }
535 return demangleUntypedVariable(Arena, MangledName,
536 "`RTTI Base Class Array'");
538 return demangleUntypedVariable(Arena, MangledName,
539 "`RTTI Class Hierarchy Descriptor'");
541 return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
543 return demangleInitFiniStub(MangledName, /*IsDestructor=*/false);
545 return demangleInitFiniStub(MangledName, /*IsDestructor=*/true);
548 // It's unclear which tools produces these manglings, so demangling
549 // support is not (yet?) implemented.
550 break;
552 DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind.
553 }
554 Error = true;
555 return nullptr;
556}
557
559Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName) {
560 assert(llvm::itanium_demangle::starts_with(MangledName, '?'));
561 MangledName.remove_prefix(1);
562 if (MangledName.empty()) {
563 Error = true;
564 return nullptr;
565 }
566
567 if (consumeFront(MangledName, "__"))
568 return demangleFunctionIdentifierCode(
570 if (consumeFront(MangledName, "_"))
571 return demangleFunctionIdentifierCode(MangledName,
573 return demangleFunctionIdentifierCode(MangledName,
575}
576
578Demangler::demangleStructorIdentifier(std::string_view &MangledName,
579 bool IsDestructor) {
580 StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
581 N->IsDestructor = IsDestructor;
582 return N;
583}
584
586Demangler::demangleConversionOperatorIdentifier(std::string_view &MangledName) {
587 ConversionOperatorIdentifierNode *N =
588 Arena.alloc<ConversionOperatorIdentifierNode>();
589 return N;
590}
591
593Demangler::demangleLiteralOperatorIdentifier(std::string_view &MangledName) {
594 LiteralOperatorIdentifierNode *N =
595 Arena.alloc<LiteralOperatorIdentifierNode>();
596 N->Name = demangleSimpleString(MangledName, /*Memorize=*/false);
597 return N;
598}
599
601Demangler::translateIntrinsicFunctionCode(char CH,
603 using IFK = IntrinsicFunctionKind;
604 if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) {
605 Error = true;
606 return IFK::None;
607 }
608
609 // Not all ? identifiers are intrinsics *functions*. This function only maps
610 // operator codes for the special functions, all others are handled elsewhere,
611 // hence the IFK::None entries in the table.
612 static IFK Basic[36] = {
613 IFK::None, // ?0 # Foo::Foo()
614 IFK::None, // ?1 # Foo::~Foo()
615 IFK::New, // ?2 # operator new
616 IFK::Delete, // ?3 # operator delete
617 IFK::Assign, // ?4 # operator=
618 IFK::RightShift, // ?5 # operator>>
619 IFK::LeftShift, // ?6 # operator<<
620 IFK::LogicalNot, // ?7 # operator!
621 IFK::Equals, // ?8 # operator==
622 IFK::NotEquals, // ?9 # operator!=
623 IFK::ArraySubscript, // ?A # operator[]
624 IFK::None, // ?B # Foo::operator <type>()
625 IFK::Pointer, // ?C # operator->
626 IFK::Dereference, // ?D # operator*
627 IFK::Increment, // ?E # operator++
628 IFK::Decrement, // ?F # operator--
629 IFK::Minus, // ?G # operator-
630 IFK::Plus, // ?H # operator+
631 IFK::BitwiseAnd, // ?I # operator&
632 IFK::MemberPointer, // ?J # operator->*
633 IFK::Divide, // ?K # operator/
634 IFK::Modulus, // ?L # operator%
635 IFK::LessThan, // ?M operator<
636 IFK::LessThanEqual, // ?N operator<=
637 IFK::GreaterThan, // ?O operator>
638 IFK::GreaterThanEqual, // ?P operator>=
639 IFK::Comma, // ?Q operator,
640 IFK::Parens, // ?R operator()
641 IFK::BitwiseNot, // ?S operator~
642 IFK::BitwiseXor, // ?T operator^
643 IFK::BitwiseOr, // ?U operator|
644 IFK::LogicalAnd, // ?V operator&&
645 IFK::LogicalOr, // ?W operator||
646 IFK::TimesEqual, // ?X operator*=
647 IFK::PlusEqual, // ?Y operator+=
648 IFK::MinusEqual, // ?Z operator-=
649 };
650 static IFK Under[36] = {
651 IFK::DivEqual, // ?_0 operator/=
652 IFK::ModEqual, // ?_1 operator%=
653 IFK::RshEqual, // ?_2 operator>>=
654 IFK::LshEqual, // ?_3 operator<<=
655 IFK::BitwiseAndEqual, // ?_4 operator&=
656 IFK::BitwiseOrEqual, // ?_5 operator|=
657 IFK::BitwiseXorEqual, // ?_6 operator^=
658 IFK::None, // ?_7 # vftable
659 IFK::None, // ?_8 # vbtable
660 IFK::None, // ?_9 # vcall
661 IFK::None, // ?_A # typeof
662 IFK::None, // ?_B # local static guard
663 IFK::None, // ?_C # string literal
664 IFK::VbaseDtor, // ?_D # vbase destructor
665 IFK::VecDelDtor, // ?_E # vector deleting destructor
666 IFK::DefaultCtorClosure, // ?_F # default constructor closure
667 IFK::ScalarDelDtor, // ?_G # scalar deleting destructor
668 IFK::VecCtorIter, // ?_H # vector constructor iterator
669 IFK::VecDtorIter, // ?_I # vector destructor iterator
670 IFK::VecVbaseCtorIter, // ?_J # vector vbase constructor iterator
671 IFK::VdispMap, // ?_K # virtual displacement map
672 IFK::EHVecCtorIter, // ?_L # eh vector constructor iterator
673 IFK::EHVecDtorIter, // ?_M # eh vector destructor iterator
674 IFK::EHVecVbaseCtorIter, // ?_N # eh vector vbase constructor iterator
675 IFK::CopyCtorClosure, // ?_O # copy constructor closure
676 IFK::None, // ?_P<name> # udt returning <name>
677 IFK::None, // ?_Q # <unknown>
678 IFK::None, // ?_R0 - ?_R4 # RTTI Codes
679 IFK::None, // ?_S # local vftable
680 IFK::LocalVftableCtorClosure, // ?_T # local vftable constructor closure
681 IFK::ArrayNew, // ?_U operator new[]
682 IFK::ArrayDelete, // ?_V operator delete[]
683 IFK::None, // ?_W <unused>
684 IFK::None, // ?_X <unused>
685 IFK::None, // ?_Y <unused>
686 IFK::None, // ?_Z <unused>
687 };
688 static IFK DoubleUnder[36] = {
689 IFK::None, // ?__0 <unused>
690 IFK::None, // ?__1 <unused>
691 IFK::None, // ?__2 <unused>
692 IFK::None, // ?__3 <unused>
693 IFK::None, // ?__4 <unused>
694 IFK::None, // ?__5 <unused>
695 IFK::None, // ?__6 <unused>
696 IFK::None, // ?__7 <unused>
697 IFK::None, // ?__8 <unused>
698 IFK::None, // ?__9 <unused>
699 IFK::ManVectorCtorIter, // ?__A managed vector ctor iterator
700 IFK::ManVectorDtorIter, // ?__B managed vector dtor iterator
701 IFK::EHVectorCopyCtorIter, // ?__C EH vector copy ctor iterator
702 IFK::EHVectorVbaseCopyCtorIter, // ?__D EH vector vbase copy ctor iter
703 IFK::None, // ?__E dynamic initializer for `T'
704 IFK::None, // ?__F dynamic atexit destructor for `T'
705 IFK::VectorCopyCtorIter, // ?__G vector copy constructor iter
706 IFK::VectorVbaseCopyCtorIter, // ?__H vector vbase copy ctor iter
707 IFK::ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy ctor
708 // iter
709 IFK::None, // ?__J local static thread guard
710 IFK::None, // ?__K operator ""_name
711 IFK::CoAwait, // ?__L operator co_await
712 IFK::Spaceship, // ?__M operator<=>
713 IFK::None, // ?__N <unused>
714 IFK::None, // ?__O <unused>
715 IFK::None, // ?__P <unused>
716 IFK::None, // ?__Q <unused>
717 IFK::None, // ?__R <unused>
718 IFK::None, // ?__S <unused>
719 IFK::None, // ?__T <unused>
720 IFK::None, // ?__U <unused>
721 IFK::None, // ?__V <unused>
722 IFK::None, // ?__W <unused>
723 IFK::None, // ?__X <unused>
724 IFK::None, // ?__Y <unused>
725 IFK::None, // ?__Z <unused>
726 };
727
728 int Index = (CH >= '0' && CH <= '9') ? (CH - '0') : (CH - 'A' + 10);
729 switch (Group) {
731 return Basic[Index];
733 return Under[Index];
735 return DoubleUnder[Index];
736 }
738}
739
741Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName,
743 if (MangledName.empty()) {
744 Error = true;
745 return nullptr;
746 }
747 const char CH = MangledName.front();
748 switch (Group) {
750 MangledName.remove_prefix(1);
751 switch (CH) {
752 case '0':
753 case '1':
754 return demangleStructorIdentifier(MangledName, CH == '1');
755 case 'B':
756 return demangleConversionOperatorIdentifier(MangledName);
757 default:
758 return Arena.alloc<IntrinsicFunctionIdentifierNode>(
759 translateIntrinsicFunctionCode(CH, Group));
760 }
762 MangledName.remove_prefix(1);
763 return Arena.alloc<IntrinsicFunctionIdentifierNode>(
764 translateIntrinsicFunctionCode(CH, Group));
766 MangledName.remove_prefix(1);
767 switch (CH) {
768 case 'K':
769 return demangleLiteralOperatorIdentifier(MangledName);
770 default:
771 return Arena.alloc<IntrinsicFunctionIdentifierNode>(
772 translateIntrinsicFunctionCode(CH, Group));
773 }
774 }
775
777}
778
779SymbolNode *Demangler::demangleEncodedSymbol(std::string_view &MangledName,
780 QualifiedNameNode *Name) {
781 if (MangledName.empty()) {
782 Error = true;
783 return nullptr;
784 }
785
786 // Read a variable.
787 switch (MangledName.front()) {
788 case '0':
789 case '1':
790 case '2':
791 case '3':
792 case '4': {
793 StorageClass SC = demangleVariableStorageClass(MangledName);
794 return demangleVariableEncoding(MangledName, SC);
795 }
796 }
797 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
798
799 IdentifierNode *UQN = Name->getUnqualifiedIdentifier();
801 ConversionOperatorIdentifierNode *COIN =
802 static_cast<ConversionOperatorIdentifierNode *>(UQN);
803 if (FSN)
804 COIN->TargetType = FSN->Signature->ReturnType;
805 }
806 return FSN;
807}
808
809SymbolNode *Demangler::demangleDeclarator(std::string_view &MangledName) {
810 // What follows is a main symbol name. This may include namespaces or class
811 // back references.
812 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
813 if (Error)
814 return nullptr;
815
816 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
817 if (Error)
818 return nullptr;
819 Symbol->Name = QN;
820
821 IdentifierNode *UQN = QN->getUnqualifiedIdentifier();
823 ConversionOperatorIdentifierNode *COIN =
824 static_cast<ConversionOperatorIdentifierNode *>(UQN);
825 if (!COIN->TargetType) {
826 Error = true;
827 return nullptr;
828 }
829 }
830 return Symbol;
831}
832
833SymbolNode *Demangler::demangleMD5Name(std::string_view &MangledName) {
834 assert(llvm::itanium_demangle::starts_with(MangledName, "??@"));
835 // This is an MD5 mangled name. We can't demangle it, just return the
836 // mangled name.
837 // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
838 size_t MD5Last = MangledName.find('@', strlen("??@"));
839 if (MD5Last == std::string_view::npos) {
840 Error = true;
841 return nullptr;
842 }
843 const char *Start = MangledName.data();
844 const size_t StartSize = MangledName.size();
845 MangledName.remove_prefix(MD5Last + 1);
846
847 // There are two additional special cases for MD5 names:
848 // 1. For complete object locators where the object name is long enough
849 // for the object to have an MD5 name, the complete object locator is
850 // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
851 // leading "??_R4". This is handled here.
852 // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
853 // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
854 // instead of_CT??@...@8 with just one MD5 name. Since we don't yet
855 // demangle catchable types anywhere, this isn't handled for MD5 names
856 // either.
857 consumeFront(MangledName, "??_R4@");
858
859 assert(MangledName.size() < StartSize);
860 const size_t Count = StartSize - MangledName.size();
861 std::string_view MD5(Start, Count);
862 SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
863 S->Name = synthesizeQualifiedName(Arena, MD5);
864
865 return S;
866}
867
868SymbolNode *Demangler::demangleTypeinfoName(std::string_view &MangledName) {
869 assert(llvm::itanium_demangle::starts_with(MangledName, '.'));
870 consumeFront(MangledName, '.');
871
872 TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
873 if (Error || !MangledName.empty()) {
874 Error = true;
875 return nullptr;
876 }
877 return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'");
878}
879
880// Parser entry point.
881SymbolNode *Demangler::parse(std::string_view &MangledName) {
882 // Typeinfo names are strings stored in RTTI data. They're not symbol names.
883 // It's still useful to demangle them. They're the only demangled entity
884 // that doesn't start with a "?" but a ".".
885 if (llvm::itanium_demangle::starts_with(MangledName, '.'))
886 return demangleTypeinfoName(MangledName);
887
888 if (llvm::itanium_demangle::starts_with(MangledName, "??@"))
889 return demangleMD5Name(MangledName);
890
891 // MSVC-style mangled symbols must start with '?'.
892 if (!llvm::itanium_demangle::starts_with(MangledName, '?')) {
893 Error = true;
894 return nullptr;
895 }
896
897 consumeFront(MangledName, '?');
898
899 // ?$ is a template instantiation, but all other names that start with ? are
900 // operators / special names.
901 if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
902 return SI;
903
904 return demangleDeclarator(MangledName);
905}
906
907TagTypeNode *Demangler::parseTagUniqueName(std::string_view &MangledName) {
908 if (!consumeFront(MangledName, ".?A")) {
909 Error = true;
910 return nullptr;
911 }
912 consumeFront(MangledName, ".?A");
913 if (MangledName.empty()) {
914 Error = true;
915 return nullptr;
916 }
917
918 return demangleClassType(MangledName);
919}
920
921// <type-encoding> ::= <storage-class> <variable-type>
922// <storage-class> ::= 0 # private static member
923// ::= 1 # protected static member
924// ::= 2 # public static member
925// ::= 3 # global
926// ::= 4 # static local
927
929Demangler::demangleVariableEncoding(std::string_view &MangledName,
930 StorageClass SC) {
932
933 VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
934 VSN->SC = SC;
935
936 if (Error)
937 return nullptr;
938
939 // <variable-type> ::= <type> <cvr-qualifiers>
940 // ::= <type> <pointee-cvr-qualifiers> # pointers, references
941 switch (VSN->Type->kind()) {
943 PointerTypeNode *PTN = static_cast<PointerTypeNode *>(VSN->Type);
944
945 Qualifiers ExtraChildQuals = Q_None;
946 PTN->Quals = Qualifiers(VSN->Type->Quals |
947 demanglePointerExtQualifiers(MangledName));
948
949 bool IsMember = false;
950 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
951
952 if (PTN->ClassParent) {
953 QualifiedNameNode *BackRefName =
954 demangleFullyQualifiedTypeName(MangledName);
955 (void)BackRefName;
956 }
957 PTN->Pointee->Quals = Qualifiers(PTN->Pointee->Quals | ExtraChildQuals);
958
959 break;
960 }
961 default:
962 VSN->Type->Quals = demangleQualifiers(MangledName).first;
963 break;
964 }
965
966 return VSN;
967}
968
969// Sometimes numbers are encoded in mangled symbols. For example,
970// "int (*x)[20]" is a valid C type (x is a pointer to an array of
971// length 20), so we need some way to embed numbers as part of symbols.
972// This function parses it.
973//
974// <number> ::= [?] <non-negative integer>
975//
976// <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
977// ::= <hex digit>+ @ # when Number == 0 or >= 10
978//
979// <hex-digit> ::= [A-P] # A = 0, B = 1, ...
980std::pair<uint64_t, bool>
981Demangler::demangleNumber(std::string_view &MangledName) {
982 bool IsNegative = consumeFront(MangledName, '?');
983
984 if (startsWithDigit(MangledName)) {
985 uint64_t Ret = MangledName[0] - '0' + 1;
986 MangledName.remove_prefix(1);
987 return {Ret, IsNegative};
988 }
989
990 uint64_t Ret = 0;
991 for (size_t i = 0; i < MangledName.size(); ++i) {
992 char C = MangledName[i];
993 if (C == '@') {
994 MangledName.remove_prefix(i + 1);
995 return {Ret, IsNegative};
996 }
997 if ('A' <= C && C <= 'P') {
998 Ret = (Ret << 4) + (C - 'A');
999 continue;
1000 }
1001 break;
1002 }
1003
1004 Error = true;
1005 return {0ULL, false};
1006}
1007
1008uint64_t Demangler::demangleUnsigned(std::string_view &MangledName) {
1009 bool IsNegative = false;
1010 uint64_t Number = 0;
1011 std::tie(Number, IsNegative) = demangleNumber(MangledName);
1012 if (IsNegative)
1013 Error = true;
1014 return Number;
1015}
1016
1017int64_t Demangler::demangleSigned(std::string_view &MangledName) {
1018 bool IsNegative = false;
1019 uint64_t Number = 0;
1020 std::tie(Number, IsNegative) = demangleNumber(MangledName);
1021 if (Number > INT64_MAX)
1022 Error = true;
1023 int64_t I = static_cast<int64_t>(Number);
1024 return IsNegative ? -I : I;
1025}
1026
1027// First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
1028// Memorize it.
1029void Demangler::memorizeString(std::string_view S) {
1030 if (Backrefs.NamesCount >= BackrefContext::Max)
1031 return;
1032 for (size_t i = 0; i < Backrefs.NamesCount; ++i)
1033 if (S == Backrefs.Names[i]->Name)
1034 return;
1035 NamedIdentifierNode *N = Arena.alloc<NamedIdentifierNode>();
1036 N->Name = S;
1037 Backrefs.Names[Backrefs.NamesCount++] = N;
1038}
1039
1041Demangler::demangleBackRefName(std::string_view &MangledName) {
1042 assert(startsWithDigit(MangledName));
1043
1044 size_t I = MangledName[0] - '0';
1045 if (I >= Backrefs.NamesCount) {
1046 Error = true;
1047 return nullptr;
1048 }
1049
1050 MangledName.remove_prefix(1);
1051 return Backrefs.Names[I];
1052}
1053
1054void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
1055 // Render this class template name into a string buffer so that we can
1056 // memorize it for the purpose of back-referencing.
1057 OutputBuffer OB;
1058 Identifier->output(OB, OF_Default);
1059 std::string_view Owned = copyString(OB);
1060 memorizeString(Owned);
1061 std::free(OB.getBuffer());
1062}
1063
1065Demangler::demangleTemplateInstantiationName(std::string_view &MangledName,
1066 NameBackrefBehavior NBB) {
1067 assert(llvm::itanium_demangle::starts_with(MangledName, "?$"));
1068 consumeFront(MangledName, "?$");
1069
1070 BackrefContext OuterContext;
1071 std::swap(OuterContext, Backrefs);
1072
1073 IdentifierNode *Identifier =
1074 demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1075 if (!Error)
1076 Identifier->TemplateParams = demangleTemplateParameterList(MangledName);
1077
1078 std::swap(OuterContext, Backrefs);
1079 if (Error)
1080 return nullptr;
1081
1082 if (NBB & NBB_Template) {
1083 // NBB_Template is only set for types and non-leaf names ("a::" in "a::b").
1084 // Structors and conversion operators only makes sense in a leaf name, so
1085 // reject them in NBB_Template contexts.
1088 Error = true;
1089 return nullptr;
1090 }
1091
1092 memorizeIdentifier(Identifier);
1093 }
1094
1095 return Identifier;
1096}
1097
1099Demangler::demangleSimpleName(std::string_view &MangledName, bool Memorize) {
1100 std::string_view S = demangleSimpleString(MangledName, Memorize);
1101 if (Error)
1102 return nullptr;
1103
1104 NamedIdentifierNode *Name = Arena.alloc<NamedIdentifierNode>();
1105 Name->Name = S;
1106 return Name;
1107}
1108
1109static bool isRebasedHexDigit(char C) { return (C >= 'A' && C <= 'P'); }
1110
1113 return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
1114}
1115
1116uint8_t Demangler::demangleCharLiteral(std::string_view &MangledName) {
1117 assert(!MangledName.empty());
1118 if (!llvm::itanium_demangle::starts_with(MangledName, '?')) {
1119 const uint8_t F = MangledName.front();
1120 MangledName.remove_prefix(1);
1121 return F;
1122 }
1123
1124 MangledName.remove_prefix(1);
1125 if (MangledName.empty())
1126 goto CharLiteralError;
1127
1128 if (consumeFront(MangledName, '$')) {
1129 // Two hex digits
1130 if (MangledName.size() < 2)
1131 goto CharLiteralError;
1132 std::string_view Nibbles = MangledName.substr(0, 2);
1133 if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
1134 goto CharLiteralError;
1135 // Don't append the null terminator.
1136 uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
1137 uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
1138 MangledName.remove_prefix(2);
1139 return (C1 << 4) | C2;
1140 }
1141
1142 if (startsWithDigit(MangledName)) {
1143 const char *Lookup = ",/\\:. \n\t'-";
1144 char C = Lookup[MangledName[0] - '0'];
1145 MangledName.remove_prefix(1);
1146 return C;
1147 }
1148
1149 if (MangledName[0] >= 'a' && MangledName[0] <= 'z') {
1150 char Lookup[26] = {'\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
1151 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE',
1152 '\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
1153 '\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
1154 char C = Lookup[MangledName[0] - 'a'];
1155 MangledName.remove_prefix(1);
1156 return C;
1157 }
1158
1159 if (MangledName[0] >= 'A' && MangledName[0] <= 'Z') {
1160 char Lookup[26] = {'\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
1161 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE',
1162 '\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
1163 '\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
1164 char C = Lookup[MangledName[0] - 'A'];
1165 MangledName.remove_prefix(1);
1166 return C;
1167 }
1168
1169CharLiteralError:
1170 Error = true;
1171 return '\0';
1172}
1173
1174wchar_t Demangler::demangleWcharLiteral(std::string_view &MangledName) {
1175 uint8_t C1, C2;
1176
1177 C1 = demangleCharLiteral(MangledName);
1178 if (Error || MangledName.empty())
1179 goto WCharLiteralError;
1180 C2 = demangleCharLiteral(MangledName);
1181 if (Error)
1182 goto WCharLiteralError;
1183
1184 return ((wchar_t)C1 << 8) | (wchar_t)C2;
1185
1186WCharLiteralError:
1187 Error = true;
1188 return L'\0';
1189}
1190
1191static void writeHexDigit(char *Buffer, uint8_t Digit) {
1192 assert(Digit <= 15);
1193 *Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
1194}
1195
1196static void outputHex(OutputBuffer &OB, unsigned C) {
1197 assert (C != 0);
1198
1199 // It's easier to do the math if we can work from right to left, but we need
1200 // to print the numbers from left to right. So render this into a temporary
1201 // buffer first, then output the temporary buffer. Each byte is of the form
1202 // \xAB, which means that each byte needs 4 characters. Since there are at
1203 // most 4 bytes, we need a 4*4+1 = 17 character temporary buffer.
1204 char TempBuffer[17];
1205
1206 ::memset(TempBuffer, 0, sizeof(TempBuffer));
1207 constexpr int MaxPos = sizeof(TempBuffer) - 1;
1208
1209 int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0.
1210 while (C != 0) {
1211 for (int I = 0; I < 2; ++I) {
1212 writeHexDigit(&TempBuffer[Pos--], C % 16);
1213 C /= 16;
1214 }
1215 }
1216 TempBuffer[Pos--] = 'x';
1217 assert(Pos >= 0);
1218 TempBuffer[Pos--] = '\\';
1219 OB << std::string_view(&TempBuffer[Pos + 1]);
1220}
1221
1222static void outputEscapedChar(OutputBuffer &OB, unsigned C) {
1223 switch (C) {
1224 case '\0': // nul
1225 OB << "\\0";
1226 return;
1227 case '\'': // single quote
1228 OB << "\\\'";
1229 return;
1230 case '\"': // double quote
1231 OB << "\\\"";
1232 return;
1233 case '\\': // backslash
1234 OB << "\\\\";
1235 return;
1236 case '\a': // bell
1237 OB << "\\a";
1238 return;
1239 case '\b': // backspace
1240 OB << "\\b";
1241 return;
1242 case '\f': // form feed
1243 OB << "\\f";
1244 return;
1245 case '\n': // new line
1246 OB << "\\n";
1247 return;
1248 case '\r': // carriage return
1249 OB << "\\r";
1250 return;
1251 case '\t': // tab
1252 OB << "\\t";
1253 return;
1254 case '\v': // vertical tab
1255 OB << "\\v";
1256 return;
1257 default:
1258 break;
1259 }
1260
1261 if (C > 0x1F && C < 0x7F) {
1262 // Standard ascii char.
1263 OB << (char)C;
1264 return;
1265 }
1266
1267 outputHex(OB, C);
1268}
1269
1270static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
1271 const uint8_t *End = StringBytes + Length - 1;
1272 unsigned Count = 0;
1273 while (Length > 0 && *End == 0) {
1274 --Length;
1275 --End;
1276 ++Count;
1277 }
1278 return Count;
1279}
1280
1281static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
1282 unsigned Length) {
1283 unsigned Result = 0;
1284 for (unsigned I = 0; I < Length; ++I) {
1285 if (*StringBytes++ == 0)
1286 ++Result;
1287 }
1288 return Result;
1289}
1290
1291// A mangled (non-wide) string literal stores the total length of the string it
1292// refers to (passed in NumBytes), and it contains up to 32 bytes of actual text
1293// (passed in StringBytes, NumChars).
1294static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
1295 uint64_t NumBytes) {
1296 assert(NumBytes > 0);
1297
1298 // If the number of bytes is odd, this is guaranteed to be a char string.
1299 if (NumBytes % 2 == 1)
1300 return 1;
1301
1302 // All strings can encode at most 32 bytes of data. If it's less than that,
1303 // then we encoded the entire string. In this case we check for a 1-byte,
1304 // 2-byte, or 4-byte null terminator.
1305 if (NumBytes < 32) {
1306 unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
1307 if (TrailingNulls >= 4 && NumBytes % 4 == 0)
1308 return 4;
1309 if (TrailingNulls >= 2)
1310 return 2;
1311 return 1;
1312 }
1313
1314 // The whole string was not able to be encoded. Try to look at embedded null
1315 // terminators to guess. The heuristic is that we count all embedded null
1316 // terminators. If more than 2/3 are null, it's a char32. If more than 1/3
1317 // are null, it's a char16. Otherwise it's a char8. This obviously isn't
1318 // perfect and is biased towards languages that have ascii alphabets, but this
1319 // was always going to be best effort since the encoding is lossy.
1320 unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
1321 if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0)
1322 return 4;
1323 if (Nulls >= NumChars / 3)
1324 return 2;
1325 return 1;
1326}
1327
1328static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
1329 unsigned CharIndex, unsigned CharBytes) {
1330 assert(CharBytes == 1 || CharBytes == 2 || CharBytes == 4);
1331 unsigned Offset = CharIndex * CharBytes;
1332 unsigned Result = 0;
1333 StringBytes = StringBytes + Offset;
1334 for (unsigned I = 0; I < CharBytes; ++I) {
1335 unsigned C = static_cast<unsigned>(StringBytes[I]);
1336 Result |= C << (8 * I);
1337 }
1338 return Result;
1339}
1340
1342Demangler::demangleVcallThunkNode(std::string_view &MangledName) {
1343 FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
1344 VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
1345 FSN->Signature = Arena.alloc<ThunkSignatureNode>();
1347
1348 FSN->Name = demangleNameScopeChain(MangledName, VTIN);
1349 if (!Error)
1350 Error = !consumeFront(MangledName, "$B");
1351 if (!Error)
1352 VTIN->OffsetInVTable = demangleUnsigned(MangledName);
1353 if (!Error)
1354 Error = !consumeFront(MangledName, 'A');
1355 if (!Error)
1356 FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
1357 return (Error) ? nullptr : FSN;
1358}
1359
1361Demangler::demangleStringLiteral(std::string_view &MangledName) {
1362 // This function uses goto, so declare all variables up front.
1363 OutputBuffer OB;
1364 std::string_view CRC;
1365 uint64_t StringByteSize;
1366 bool IsWcharT = false;
1367 bool IsNegative = false;
1368 size_t CrcEndPos = 0;
1369 char F;
1370
1371 EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
1372
1373 // Prefix indicating the beginning of a string literal
1374 if (!consumeFront(MangledName, "@_"))
1375 goto StringLiteralError;
1376 if (MangledName.empty())
1377 goto StringLiteralError;
1378
1379 // Char Type (regular or wchar_t)
1380 F = MangledName.front();
1381 MangledName.remove_prefix(1);
1382 switch (F) {
1383 case '1':
1384 IsWcharT = true;
1386 case '0':
1387 break;
1388 default:
1389 goto StringLiteralError;
1390 }
1391
1392 // Encoded Length
1393 std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
1394 if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1))
1395 goto StringLiteralError;
1396
1397 // CRC 32 (always 8 characters plus a terminator)
1398 CrcEndPos = MangledName.find('@');
1399 if (CrcEndPos == std::string_view::npos)
1400 goto StringLiteralError;
1401 CRC = MangledName.substr(0, CrcEndPos);
1402 MangledName.remove_prefix(CrcEndPos + 1);
1403 if (MangledName.empty())
1404 goto StringLiteralError;
1405
1406 if (IsWcharT) {
1407 Result->Char = CharKind::Wchar;
1408 if (StringByteSize > 64)
1409 Result->IsTruncated = true;
1410
1411 while (!consumeFront(MangledName, '@')) {
1412 // For a wide string StringByteSize has to have an even length.
1413 if (StringByteSize % 2 != 0)
1414 goto StringLiteralError;
1415 if (StringByteSize == 0)
1416 goto StringLiteralError;
1417 if (MangledName.size() < 2)
1418 goto StringLiteralError;
1419 wchar_t W = demangleWcharLiteral(MangledName);
1420 if (StringByteSize != 2 || Result->IsTruncated)
1421 outputEscapedChar(OB, W);
1422 StringByteSize -= 2;
1423 if (Error)
1424 goto StringLiteralError;
1425 }
1426 } else {
1427 // The max byte length is actually 32, but some compilers mangled strings
1428 // incorrectly, so we have to assume it can go higher.
1429 constexpr unsigned MaxStringByteLength = 32 * 4;
1430 uint8_t StringBytes[MaxStringByteLength];
1431
1432 unsigned BytesDecoded = 0;
1433 while (!consumeFront(MangledName, '@')) {
1434 if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength)
1435 goto StringLiteralError;
1436 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
1437 }
1438
1439 if (StringByteSize > BytesDecoded)
1440 Result->IsTruncated = true;
1441
1442 unsigned CharBytes =
1443 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
1444 assert(StringByteSize % CharBytes == 0);
1445 switch (CharBytes) {
1446 case 1:
1447 Result->Char = CharKind::Char;
1448 break;
1449 case 2:
1450 Result->Char = CharKind::Char16;
1451 break;
1452 case 4:
1453 Result->Char = CharKind::Char32;
1454 break;
1455 default:
1457 }
1458 const unsigned NumChars = BytesDecoded / CharBytes;
1459 for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
1460 unsigned NextChar =
1461 decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
1462 if (CharIndex + 1 < NumChars || Result->IsTruncated)
1463 outputEscapedChar(OB, NextChar);
1464 }
1465 }
1466
1467 Result->DecodedString = copyString(OB);
1468 std::free(OB.getBuffer());
1469 return Result;
1470
1471StringLiteralError:
1472 Error = true;
1473 std::free(OB.getBuffer());
1474 return nullptr;
1475}
1476
1477// Returns MangledName's prefix before the first '@', or an error if
1478// MangledName contains no '@' or the prefix has length 0.
1479std::string_view Demangler::demangleSimpleString(std::string_view &MangledName,
1480 bool Memorize) {
1481 std::string_view S;
1482 for (size_t i = 0; i < MangledName.size(); ++i) {
1483 if (MangledName[i] != '@')
1484 continue;
1485 if (i == 0)
1486 break;
1487 S = MangledName.substr(0, i);
1488 MangledName.remove_prefix(i + 1);
1489
1490 if (Memorize)
1491 memorizeString(S);
1492 return S;
1493 }
1494
1495 Error = true;
1496 return {};
1497}
1498
1500Demangler::demangleAnonymousNamespaceName(std::string_view &MangledName) {
1501 assert(llvm::itanium_demangle::starts_with(MangledName, "?A"));
1502 consumeFront(MangledName, "?A");
1503
1504 NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
1505 Node->Name = "`anonymous namespace'";
1506 size_t EndPos = MangledName.find('@');
1507 if (EndPos == std::string_view::npos) {
1508 Error = true;
1509 return nullptr;
1510 }
1511 std::string_view NamespaceKey = MangledName.substr(0, EndPos);
1512 memorizeString(NamespaceKey);
1513 MangledName = MangledName.substr(EndPos + 1);
1514 return Node;
1515}
1516
1518Demangler::demangleLocallyScopedNamePiece(std::string_view &MangledName) {
1519 assert(startsWithLocalScopePattern(MangledName));
1520
1521 NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
1522 consumeFront(MangledName, '?');
1523 uint64_t Number = 0;
1524 bool IsNegative = false;
1525 std::tie(Number, IsNegative) = demangleNumber(MangledName);
1526 assert(!IsNegative);
1527
1528 // One ? to terminate the number
1529 consumeFront(MangledName, '?');
1530
1531 assert(!Error);
1532 Node *Scope = parse(MangledName);
1533 if (Error)
1534 return nullptr;
1535
1536 // Render the parent symbol's name into a buffer.
1537 OutputBuffer OB;
1538 OB << '`';
1539 Scope->output(OB, OF_Default);
1540 OB << '\'';
1541 OB << "::`" << Number << "'";
1542
1543 Identifier->Name = copyString(OB);
1544 std::free(OB.getBuffer());
1545 return Identifier;
1546}
1547
1548// Parses a type name in the form of A@B@C@@ which represents C::B::A.
1550Demangler::demangleFullyQualifiedTypeName(std::string_view &MangledName) {
1551 IdentifierNode *Identifier =
1552 demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
1553 if (Error)
1554 return nullptr;
1555 assert(Identifier);
1556
1557 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1558 if (Error)
1559 return nullptr;
1560 assert(QN);
1561 return QN;
1562}
1563
1564// Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
1565// Symbol names have slightly different rules regarding what can appear
1566// so we separate out the implementations for flexibility.
1568Demangler::demangleFullyQualifiedSymbolName(std::string_view &MangledName) {
1569 // This is the final component of a symbol name (i.e. the leftmost component
1570 // of a mangled name. Since the only possible template instantiation that
1571 // can appear in this context is a function template, and since those are
1572 // not saved for the purposes of name backreferences, only backref simple
1573 // names.
1574 IdentifierNode *Identifier =
1575 demangleUnqualifiedSymbolName(MangledName, NBB_Simple);
1576 if (Error)
1577 return nullptr;
1578
1579 QualifiedNameNode *QN = demangleNameScopeChain(MangledName, Identifier);
1580 if (Error)
1581 return nullptr;
1582
1584 if (QN->Components->Count < 2) {
1585 Error = true;
1586 return nullptr;
1587 }
1588 StructorIdentifierNode *SIN =
1589 static_cast<StructorIdentifierNode *>(Identifier);
1590 Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
1591 SIN->Class = static_cast<IdentifierNode *>(ClassNode);
1592 }
1593 assert(QN);
1594 return QN;
1595}
1596
1598Demangler::demangleUnqualifiedTypeName(std::string_view &MangledName,
1599 bool Memorize) {
1600 // An inner-most name can be a back-reference, because a fully-qualified name
1601 // (e.g. Scope + Inner) can contain other fully qualified names inside of
1602 // them (for example template parameters), and these nested parameters can
1603 // refer to previously mangled types.
1604 if (startsWithDigit(MangledName))
1605 return demangleBackRefName(MangledName);
1606
1607 if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
1608 return demangleTemplateInstantiationName(MangledName, NBB_Template);
1609
1610 return demangleSimpleName(MangledName, Memorize);
1611}
1612
1614Demangler::demangleUnqualifiedSymbolName(std::string_view &MangledName,
1615 NameBackrefBehavior NBB) {
1616 if (startsWithDigit(MangledName))
1617 return demangleBackRefName(MangledName);
1618 if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
1619 return demangleTemplateInstantiationName(MangledName, NBB);
1620 if (llvm::itanium_demangle::starts_with(MangledName, '?'))
1621 return demangleFunctionIdentifierCode(MangledName);
1622 return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0);
1623}
1624
1626Demangler::demangleNameScopePiece(std::string_view &MangledName) {
1627 if (startsWithDigit(MangledName))
1628 return demangleBackRefName(MangledName);
1629
1630 if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
1631 return demangleTemplateInstantiationName(MangledName, NBB_Template);
1632
1633 if (llvm::itanium_demangle::starts_with(MangledName, "?A"))
1634 return demangleAnonymousNamespaceName(MangledName);
1635
1636 if (startsWithLocalScopePattern(MangledName))
1637 return demangleLocallyScopedNamePiece(MangledName);
1638
1639 return demangleSimpleName(MangledName, /*Memorize=*/true);
1640}
1641
1643Demangler::demangleNameScopeChain(std::string_view &MangledName,
1644 IdentifierNode *UnqualifiedName) {
1645 NodeList *Head = Arena.alloc<NodeList>();
1646
1647 Head->N = UnqualifiedName;
1648
1649 size_t Count = 1;
1650 while (!consumeFront(MangledName, "@")) {
1651 ++Count;
1652 NodeList *NewHead = Arena.alloc<NodeList>();
1653 NewHead->Next = Head;
1654 Head = NewHead;
1655
1656 if (MangledName.empty()) {
1657 Error = true;
1658 return nullptr;
1659 }
1660
1661 assert(!Error);
1662 IdentifierNode *Elem = demangleNameScopePiece(MangledName);
1663 if (Error)
1664 return nullptr;
1665
1666 Head->N = Elem;
1667 }
1668
1669 QualifiedNameNode *QN = Arena.alloc<QualifiedNameNode>();
1670 QN->Components = nodeListToNodeArray(Arena, Head, Count);
1671 return QN;
1672}
1673
1674FuncClass Demangler::demangleFunctionClass(std::string_view &MangledName) {
1675 const char F = MangledName.front();
1676 MangledName.remove_prefix(1);
1677 switch (F) {
1678 case '9':
1680 case 'A':
1681 return FC_Private;
1682 case 'B':
1683 return FuncClass(FC_Private | FC_Far);
1684 case 'C':
1685 return FuncClass(FC_Private | FC_Static);
1686 case 'D':
1688 case 'E':
1690 case 'F':
1692 case 'G':
1694 case 'H':
1696 case 'I':
1697 return FuncClass(FC_Protected);
1698 case 'J':
1699 return FuncClass(FC_Protected | FC_Far);
1700 case 'K':
1702 case 'L':
1704 case 'M':
1706 case 'N':
1708 case 'O':
1710 case 'P':
1712 case 'Q':
1713 return FuncClass(FC_Public);
1714 case 'R':
1715 return FuncClass(FC_Public | FC_Far);
1716 case 'S':
1717 return FuncClass(FC_Public | FC_Static);
1718 case 'T':
1719 return FuncClass(FC_Public | FC_Static | FC_Far);
1720 case 'U':
1721 return FuncClass(FC_Public | FC_Virtual);
1722 case 'V':
1724 case 'W':
1726 case 'X':
1728 case 'Y':
1729 return FuncClass(FC_Global);
1730 case 'Z':
1731 return FuncClass(FC_Global | FC_Far);
1732 case '$': {
1734 if (consumeFront(MangledName, 'R'))
1735 VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
1736 if (MangledName.empty())
1737 break;
1738 const char F = MangledName.front();
1739 MangledName.remove_prefix(1);
1740 switch (F) {
1741 case '0':
1742 return FuncClass(FC_Private | FC_Virtual | VFlag);
1743 case '1':
1744 return FuncClass(FC_Private | FC_Virtual | VFlag | FC_Far);
1745 case '2':
1746 return FuncClass(FC_Protected | FC_Virtual | VFlag);
1747 case '3':
1748 return FuncClass(FC_Protected | FC_Virtual | VFlag | FC_Far);
1749 case '4':
1750 return FuncClass(FC_Public | FC_Virtual | VFlag);
1751 case '5':
1752 return FuncClass(FC_Public | FC_Virtual | VFlag | FC_Far);
1753 }
1754 }
1755 }
1756
1757 Error = true;
1758 return FC_Public;
1759}
1760
1762Demangler::demangleCallingConvention(std::string_view &MangledName) {
1763 if (MangledName.empty()) {
1764 Error = true;
1765 return CallingConv::None;
1766 }
1767
1768 const char F = MangledName.front();
1769 MangledName.remove_prefix(1);
1770 switch (F) {
1771 case 'A':
1772 case 'B':
1773 return CallingConv::Cdecl;
1774 case 'C':
1775 case 'D':
1776 return CallingConv::Pascal;
1777 case 'E':
1778 case 'F':
1779 return CallingConv::Thiscall;
1780 case 'G':
1781 case 'H':
1782 return CallingConv::Stdcall;
1783 case 'I':
1784 case 'J':
1785 return CallingConv::Fastcall;
1786 case 'M':
1787 case 'N':
1788 return CallingConv::Clrcall;
1789 case 'O':
1790 case 'P':
1791 return CallingConv::Eabi;
1792 case 'Q':
1794 case 'S':
1795 return CallingConv::Swift;
1796 case 'W':
1798 }
1799
1800 return CallingConv::None;
1801}
1802
1804Demangler::demangleVariableStorageClass(std::string_view &MangledName) {
1805 assert(MangledName.front() >= '0' && MangledName.front() <= '4');
1806
1807 const char F = MangledName.front();
1808 MangledName.remove_prefix(1);
1809 switch (F) {
1810 case '0':
1812 case '1':
1814 case '2':
1816 case '3':
1817 return StorageClass::Global;
1818 case '4':
1820 }
1822}
1823
1824std::pair<Qualifiers, bool>
1825Demangler::demangleQualifiers(std::string_view &MangledName) {
1826 if (MangledName.empty()) {
1827 Error = true;
1828 return std::make_pair(Q_None, false);
1829 }
1830
1831 const char F = MangledName.front();
1832 MangledName.remove_prefix(1);
1833 switch (F) {
1834 // Member qualifiers
1835 case 'Q':
1836 return std::make_pair(Q_None, true);
1837 case 'R':
1838 return std::make_pair(Q_Const, true);
1839 case 'S':
1840 return std::make_pair(Q_Volatile, true);
1841 case 'T':
1842 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true);
1843 // Non-Member qualifiers
1844 case 'A':
1845 return std::make_pair(Q_None, false);
1846 case 'B':
1847 return std::make_pair(Q_Const, false);
1848 case 'C':
1849 return std::make_pair(Q_Volatile, false);
1850 case 'D':
1851 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false);
1852 }
1853 Error = true;
1854 return std::make_pair(Q_None, false);
1855}
1856
1857// <variable-type> ::= <type> <cvr-qualifiers>
1858// ::= <type> <pointee-cvr-qualifiers> # pointers, references
1859TypeNode *Demangler::demangleType(std::string_view &MangledName,
1860 QualifierMangleMode QMM) {
1861 Qualifiers Quals = Q_None;
1862 bool IsMember = false;
1863 if (QMM == QualifierMangleMode::Mangle) {
1864 std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1865 } else if (QMM == QualifierMangleMode::Result) {
1866 if (consumeFront(MangledName, '?'))
1867 std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1868 }
1869
1870 if (MangledName.empty()) {
1871 Error = true;
1872 return nullptr;
1873 }
1874
1875 TypeNode *Ty = nullptr;
1876 if (isTagType(MangledName))
1877 Ty = demangleClassType(MangledName);
1878 else if (isPointerType(MangledName)) {
1879 if (isMemberPointer(MangledName, Error))
1880 Ty = demangleMemberPointerType(MangledName);
1881 else if (!Error)
1882 Ty = demanglePointerType(MangledName);
1883 else
1884 return nullptr;
1885 } else if (isArrayType(MangledName))
1886 Ty = demangleArrayType(MangledName);
1887 else if (isFunctionType(MangledName)) {
1888 if (consumeFront(MangledName, "$$A8@@"))
1889 Ty = demangleFunctionType(MangledName, true);
1890 else {
1891 assert(llvm::itanium_demangle::starts_with(MangledName, "$$A6"));
1892 consumeFront(MangledName, "$$A6");
1893 Ty = demangleFunctionType(MangledName, false);
1894 }
1895 } else if (isCustomType(MangledName)) {
1896 Ty = demangleCustomType(MangledName);
1897 } else {
1898 Ty = demanglePrimitiveType(MangledName);
1899 }
1900
1901 if (!Ty || Error)
1902 return Ty;
1903 Ty->Quals = Qualifiers(Ty->Quals | Quals);
1904 return Ty;
1905}
1906
1907bool Demangler::demangleThrowSpecification(std::string_view &MangledName) {
1908 if (consumeFront(MangledName, "_E"))
1909 return true;
1910 if (consumeFront(MangledName, 'Z'))
1911 return false;
1912
1913 Error = true;
1914 return false;
1915}
1916
1918Demangler::demangleFunctionType(std::string_view &MangledName,
1919 bool HasThisQuals) {
1920 FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
1921
1922 if (HasThisQuals) {
1923 FTy->Quals = demanglePointerExtQualifiers(MangledName);
1924 FTy->RefQualifier = demangleFunctionRefQualifier(MangledName);
1925 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
1926 }
1927
1928 // Fields that appear on both member and non-member functions.
1929 FTy->CallConvention = demangleCallingConvention(MangledName);
1930
1931 // <return-type> ::= <type>
1932 // ::= @ # structors (they have no declared return type)
1933 bool IsStructor = consumeFront(MangledName, '@');
1934 if (!IsStructor)
1935 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
1936
1937 FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic);
1938
1939 FTy->IsNoexcept = demangleThrowSpecification(MangledName);
1940
1941 return FTy;
1942}
1943
1945Demangler::demangleFunctionEncoding(std::string_view &MangledName) {
1946 FuncClass ExtraFlags = FC_None;
1947 if (consumeFront(MangledName, "$$J0"))
1948 ExtraFlags = FC_ExternC;
1949
1950 if (MangledName.empty()) {
1951 Error = true;
1952 return nullptr;
1953 }
1954
1955 FuncClass FC = demangleFunctionClass(MangledName);
1956 FC = FuncClass(ExtraFlags | FC);
1957
1958 FunctionSignatureNode *FSN = nullptr;
1959 ThunkSignatureNode *TTN = nullptr;
1960 if (FC & FC_StaticThisAdjust) {
1961 TTN = Arena.alloc<ThunkSignatureNode>();
1962 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1963 } else if (FC & FC_VirtualThisAdjust) {
1964 TTN = Arena.alloc<ThunkSignatureNode>();
1965 if (FC & FC_VirtualThisAdjustEx) {
1966 TTN->ThisAdjust.VBPtrOffset = demangleSigned(MangledName);
1967 TTN->ThisAdjust.VBOffsetOffset = demangleSigned(MangledName);
1968 }
1969 TTN->ThisAdjust.VtordispOffset = demangleSigned(MangledName);
1970 TTN->ThisAdjust.StaticOffset = demangleSigned(MangledName);
1971 }
1972
1973 if (FC & FC_NoParameterList) {
1974 // This is an extern "C" function whose full signature hasn't been mangled.
1975 // This happens when we need to mangle a local symbol inside of an extern
1976 // "C" function.
1977 FSN = Arena.alloc<FunctionSignatureNode>();
1978 } else {
1979 bool HasThisQuals = !(FC & (FC_Global | FC_Static));
1980 FSN = demangleFunctionType(MangledName, HasThisQuals);
1981 }
1982
1983 if (Error)
1984 return nullptr;
1985
1986 if (TTN) {
1987 *static_cast<FunctionSignatureNode *>(TTN) = *FSN;
1988 FSN = TTN;
1989 }
1990 FSN->FunctionClass = FC;
1991
1992 FunctionSymbolNode *Symbol = Arena.alloc<FunctionSymbolNode>();
1993 Symbol->Signature = FSN;
1994 return Symbol;
1995}
1996
1997CustomTypeNode *Demangler::demangleCustomType(std::string_view &MangledName) {
1998 assert(llvm::itanium_demangle::starts_with(MangledName, '?'));
1999 MangledName.remove_prefix(1);
2000
2001 CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
2002 CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
2003 if (!consumeFront(MangledName, '@'))
2004 Error = true;
2005 if (Error)
2006 return nullptr;
2007 return CTN;
2008}
2009
2010// Reads a primitive type.
2012Demangler::demanglePrimitiveType(std::string_view &MangledName) {
2013 if (consumeFront(MangledName, "$$T"))
2014 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
2015
2016 const char F = MangledName.front();
2017 MangledName.remove_prefix(1);
2018 switch (F) {
2019 case 'X':
2020 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
2021 case 'D':
2022 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char);
2023 case 'C':
2024 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Schar);
2025 case 'E':
2026 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uchar);
2027 case 'F':
2028 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Short);
2029 case 'G':
2030 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ushort);
2031 case 'H':
2032 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int);
2033 case 'I':
2034 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint);
2035 case 'J':
2036 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Long);
2037 case 'K':
2038 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ulong);
2039 case 'M':
2040 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Float);
2041 case 'N':
2042 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Double);
2043 case 'O':
2044 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Ldouble);
2045 case '_': {
2046 if (MangledName.empty()) {
2047 Error = true;
2048 return nullptr;
2049 }
2050 const char F = MangledName.front();
2051 MangledName.remove_prefix(1);
2052 switch (F) {
2053 case 'N':
2054 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
2055 case 'J':
2056 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Int64);
2057 case 'K':
2058 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
2059 case 'W':
2060 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
2061 case 'Q':
2062 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8);
2063 case 'S':
2064 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
2065 case 'U':
2066 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
2067 case 'P':
2068 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Auto);
2069 case 'T':
2070 return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::DecltypeAuto);
2071 }
2072 break;
2073 }
2074 }
2075 Error = true;
2076 return nullptr;
2077}
2078
2079TagTypeNode *Demangler::demangleClassType(std::string_view &MangledName) {
2080 TagTypeNode *TT = nullptr;
2081
2082 const char F = MangledName.front();
2083 MangledName.remove_prefix(1);
2084 switch (F) {
2085 case 'T':
2086 TT = Arena.alloc<TagTypeNode>(TagKind::Union);
2087 break;
2088 case 'U':
2089 TT = Arena.alloc<TagTypeNode>(TagKind::Struct);
2090 break;
2091 case 'V':
2092 TT = Arena.alloc<TagTypeNode>(TagKind::Class);
2093 break;
2094 case 'W':
2095 if (!consumeFront(MangledName, '4')) {
2096 Error = true;
2097 return nullptr;
2098 }
2099 TT = Arena.alloc<TagTypeNode>(TagKind::Enum);
2100 break;
2101 default:
2102 assert(false);
2103 }
2104
2105 TT->QualifiedName = demangleFullyQualifiedTypeName(MangledName);
2106 return TT;
2107}
2108
2109// <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
2110// # the E is required for 64-bit non-static pointers
2111PointerTypeNode *Demangler::demanglePointerType(std::string_view &MangledName) {
2112 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2113
2114 std::tie(Pointer->Quals, Pointer->Affinity) =
2115 demanglePointerCVQualifiers(MangledName);
2116
2117 if (consumeFront(MangledName, "6")) {
2118 Pointer->Pointee = demangleFunctionType(MangledName, false);
2119 return Pointer;
2120 }
2121
2122 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2123 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2124
2125 Pointer->PointerAuthQualifier = createPointerAuthQualifier(MangledName);
2126
2127 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
2128 return Pointer;
2129}
2130
2132Demangler::demangleMemberPointerType(std::string_view &MangledName) {
2133 PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
2134
2135 std::tie(Pointer->Quals, Pointer->Affinity) =
2136 demanglePointerCVQualifiers(MangledName);
2138
2139 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
2140 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
2141
2142 // isMemberPointer() only returns true if there is at least one character
2143 // after the qualifiers.
2144 if (consumeFront(MangledName, "8")) {
2145 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2146 Pointer->Pointee = demangleFunctionType(MangledName, true);
2147 } else {
2148 Qualifiers PointeeQuals = Q_None;
2149 bool IsMember = false;
2150 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
2151 assert(IsMember || Error);
2152 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
2153
2154 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
2155 if (Pointer->Pointee)
2156 Pointer->Pointee->Quals = PointeeQuals;
2157 }
2158
2159 return Pointer;
2160}
2161
2163Demangler::demanglePointerExtQualifiers(std::string_view &MangledName) {
2164 Qualifiers Quals = Q_None;
2165 if (consumeFront(MangledName, 'E'))
2166 Quals = Qualifiers(Quals | Q_Pointer64);
2167 if (consumeFront(MangledName, 'I'))
2168 Quals = Qualifiers(Quals | Q_Restrict);
2169 if (consumeFront(MangledName, 'F'))
2170 Quals = Qualifiers(Quals | Q_Unaligned);
2171
2172 return Quals;
2173}
2174
2175std::optional<PointerAuthQualifierNode::ArgArray>
2176Demangler::demanglePointerAuthQualifier(std::string_view &MangledName) {
2177 if (!consumeFront(MangledName, "__ptrauth"))
2178 return std::nullopt;
2179
2180 constexpr unsigned NumArgs = PointerAuthQualifierNode::NumArgs;
2182
2183 for (unsigned I = 0; I < NumArgs; ++I) {
2184 bool IsNegative = false;
2185 uint64_t Value = 0;
2186 std::tie(Value, IsNegative) = demangleNumber(MangledName);
2187 if (IsNegative)
2188 return std::nullopt;
2189
2190 Array[I] = Value;
2191 }
2192
2193 return Array;
2194}
2195
2197Demangler::createPointerAuthQualifier(std::string_view &MangledName) {
2198 constexpr unsigned NumArgs = PointerAuthQualifierNode::NumArgs;
2199 std::optional<PointerAuthQualifierNode::ArgArray> Vals =
2200 demanglePointerAuthQualifier(MangledName);
2201
2202 if (!Vals)
2203 return nullptr;
2204
2205 PointerAuthQualifierNode *PtrAuthQual =
2206 Arena.alloc<PointerAuthQualifierNode>();
2207 NodeArrayNode *Array = Arena.alloc<NodeArrayNode>();
2208 PtrAuthQual->Components = Array;
2209 Array->Count = NumArgs;
2210 Array->Nodes = Arena.allocArray<Node *>(NumArgs);
2211
2212 for (unsigned I = 0; I < NumArgs; ++I)
2213 Array->Nodes[I] = Arena.alloc<IntegerLiteralNode>((*Vals)[I], false);
2214
2215 return PtrAuthQual;
2216}
2217
2218ArrayTypeNode *Demangler::demangleArrayType(std::string_view &MangledName) {
2219 assert(MangledName.front() == 'Y');
2220 MangledName.remove_prefix(1);
2221
2222 uint64_t Rank = 0;
2223 bool IsNegative = false;
2224 std::tie(Rank, IsNegative) = demangleNumber(MangledName);
2225 if (IsNegative || Rank == 0) {
2226 Error = true;
2227 return nullptr;
2228 }
2229
2230 ArrayTypeNode *ATy = Arena.alloc<ArrayTypeNode>();
2231 NodeList *Head = Arena.alloc<NodeList>();
2232 NodeList *Tail = Head;
2233
2234 for (uint64_t I = 0; I < Rank; ++I) {
2235 uint64_t D = 0;
2236 std::tie(D, IsNegative) = demangleNumber(MangledName);
2237 if (Error || IsNegative) {
2238 Error = true;
2239 return nullptr;
2240 }
2241 Tail->N = Arena.alloc<IntegerLiteralNode>(D, IsNegative);
2242 if (I + 1 < Rank) {
2243 Tail->Next = Arena.alloc<NodeList>();
2244 Tail = Tail->Next;
2245 }
2246 }
2247 ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
2248
2249 if (consumeFront(MangledName, "$$C")) {
2250 bool IsMember = false;
2251 std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
2252 if (IsMember) {
2253 Error = true;
2254 return nullptr;
2255 }
2256 }
2257
2258 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
2259 return ATy;
2260}
2261
2262// Reads a function's parameters.
2264Demangler::demangleFunctionParameterList(std::string_view &MangledName,
2265 bool &IsVariadic) {
2266 // Empty parameter list.
2267 if (consumeFront(MangledName, 'X'))
2268 return nullptr;
2269
2270 NodeList *Head = Arena.alloc<NodeList>();
2271 NodeList **Current = &Head;
2272 size_t Count = 0;
2273 while (!Error && !llvm::itanium_demangle::starts_with(MangledName, '@') &&
2274 !llvm::itanium_demangle::starts_with(MangledName, 'Z')) {
2275 ++Count;
2276
2277 if (startsWithDigit(MangledName)) {
2278 size_t N = MangledName[0] - '0';
2279 if (N >= Backrefs.FunctionParamCount) {
2280 Error = true;
2281 return nullptr;
2282 }
2283 MangledName.remove_prefix(1);
2284
2285 *Current = Arena.alloc<NodeList>();
2286 (*Current)->N = Backrefs.FunctionParams[N];
2287 Current = &(*Current)->Next;
2288 continue;
2289 }
2290
2291 size_t OldSize = MangledName.size();
2292
2293 *Current = Arena.alloc<NodeList>();
2294 TypeNode *TN = demangleType(MangledName, QualifierMangleMode::Drop);
2295 if (!TN || Error)
2296 return nullptr;
2297
2298 (*Current)->N = TN;
2299
2300 size_t CharsConsumed = OldSize - MangledName.size();
2301 assert(CharsConsumed != 0);
2302
2303 // Single-letter types are ignored for backreferences because memorizing
2304 // them doesn't save anything.
2305 if (Backrefs.FunctionParamCount <= 9 && CharsConsumed > 1)
2306 Backrefs.FunctionParams[Backrefs.FunctionParamCount++] = TN;
2307
2308 Current = &(*Current)->Next;
2309 }
2310
2311 if (Error)
2312 return nullptr;
2313
2314 NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
2315 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
2316 // list or '@' (non variadic). Careful not to consume "@Z", as in that case
2317 // the following Z could be a throw specifier.
2318 if (consumeFront(MangledName, '@'))
2319 return NA;
2320
2321 if (consumeFront(MangledName, 'Z')) {
2322 IsVariadic = true;
2323 return NA;
2324 }
2325
2327}
2328
2330Demangler::demangleTemplateParameterList(std::string_view &MangledName) {
2331 NodeList *Head = nullptr;
2332 NodeList **Current = &Head;
2333 size_t Count = 0;
2334
2335 while (!llvm::itanium_demangle::starts_with(MangledName, '@')) {
2336 if (consumeFront(MangledName, "$S") || consumeFront(MangledName, "$$V") ||
2337 consumeFront(MangledName, "$$$V") || consumeFront(MangledName, "$$Z")) {
2338 // parameter pack separator
2339 continue;
2340 }
2341
2342 ++Count;
2343
2344 // Template parameter lists don't participate in back-referencing.
2345 *Current = Arena.alloc<NodeList>();
2346
2347 NodeList &TP = **Current;
2348
2349 // <auto-nttp> ::= $ M <type> <nttp>
2350 const bool IsAutoNTTP = consumeFront(MangledName, "$M");
2351 if (IsAutoNTTP) {
2352 // The deduced type of the auto NTTP parameter isn't printed so
2353 // we want to ignore the AST created from demangling the type.
2354 //
2355 // TODO: Avoid the extra allocations to the bump allocator in this case.
2356 (void)demangleType(MangledName, QualifierMangleMode::Drop);
2357 if (Error)
2358 return nullptr;
2359 }
2360
2361 TemplateParameterReferenceNode *TPRN = nullptr;
2362 if (consumeFront(MangledName, "$$Y")) {
2363 // Template alias
2364 TP.N = demangleFullyQualifiedTypeName(MangledName);
2365 } else if (consumeFront(MangledName, "$$B")) {
2366 // Array
2367 TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2368 } else if (consumeFront(MangledName, "$$C")) {
2369 // Type has qualifiers.
2370 TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
2371 } else if (startsWith(MangledName, "$1", "1", !IsAutoNTTP) ||
2372 startsWith(MangledName, "$H", "H", !IsAutoNTTP) ||
2373 startsWith(MangledName, "$I", "I", !IsAutoNTTP) ||
2374 startsWith(MangledName, "$J", "J", !IsAutoNTTP)) {
2375 // Pointer to member
2376 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2377 TPRN->IsMemberPointer = true;
2378
2379 if (!IsAutoNTTP)
2380 MangledName.remove_prefix(1); // Remove leading '$'
2381
2382 // 1 - single inheritance <name>
2383 // H - multiple inheritance <name> <number>
2384 // I - virtual inheritance <name> <number> <number>
2385 // J - unspecified inheritance <name> <number> <number> <number>
2386 char InheritanceSpecifier = MangledName.front();
2387 MangledName.remove_prefix(1);
2388 SymbolNode *S = nullptr;
2389 if (llvm::itanium_demangle::starts_with(MangledName, '?')) {
2390 S = parse(MangledName);
2391 if (Error || !S->Name) {
2392 Error = true;
2393 return nullptr;
2394 }
2395 memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
2396 }
2397
2398 switch (InheritanceSpecifier) {
2399 case 'J':
2400 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2401 demangleSigned(MangledName);
2403 case 'I':
2404 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2405 demangleSigned(MangledName);
2407 case 'H':
2408 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2409 demangleSigned(MangledName);
2411 case '1':
2412 break;
2413 default:
2415 }
2417 TPRN->Symbol = S;
2418 } else if (llvm::itanium_demangle::starts_with(MangledName, "$E?")) {
2419 consumeFront(MangledName, "$E");
2420 // Reference to symbol
2421 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2422 TPRN->Symbol = parse(MangledName);
2424 } else if (startsWith(MangledName, "$F", "F", !IsAutoNTTP) ||
2425 startsWith(MangledName, "$G", "G", !IsAutoNTTP)) {
2426 TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
2427
2428 // Data member pointer.
2429 if (!IsAutoNTTP)
2430 MangledName.remove_prefix(1); // Remove leading '$'
2431 char InheritanceSpecifier = MangledName.front();
2432 MangledName.remove_prefix(1);
2433
2434 switch (InheritanceSpecifier) {
2435 case 'G':
2436 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2437 demangleSigned(MangledName);
2439 case 'F':
2440 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2441 demangleSigned(MangledName);
2442 TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
2443 demangleSigned(MangledName);
2444 break;
2445 default:
2447 }
2448 TPRN->IsMemberPointer = true;
2449
2450 } else if (consumeFront(MangledName, "$0", "0", !IsAutoNTTP)) {
2451 // Integral non-type template parameter
2452 bool IsNegative = false;
2453 uint64_t Value = 0;
2454 std::tie(Value, IsNegative) = demangleNumber(MangledName);
2455
2456 TP.N = Arena.alloc<IntegerLiteralNode>(Value, IsNegative);
2457 } else {
2458 TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
2459 }
2460 if (Error)
2461 return nullptr;
2462
2463 Current = &TP.Next;
2464 }
2465
2466 // The loop above returns nullptr on Error.
2467 assert(!Error);
2468
2469 // Template parameter lists cannot be variadic, so it can only be terminated
2470 // by @ (as opposed to 'Z' in the function parameter case).
2471 assert(llvm::itanium_demangle::starts_with(
2472 MangledName, '@')); // The above loop exits only on '@'.
2473 consumeFront(MangledName, '@');
2474 return nodeListToNodeArray(Arena, Head, Count);
2475}
2476
2477void Demangler::dumpBackReferences() {
2478 std::printf("%d function parameter backreferences\n",
2479 (int)Backrefs.FunctionParamCount);
2480
2481 // Create an output stream so we can render each type.
2482 OutputBuffer OB;
2483 for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
2484 OB.setCurrentPosition(0);
2485
2486 TypeNode *T = Backrefs.FunctionParams[I];
2487 T->output(OB, OF_Default);
2488
2489 std::string_view B = OB;
2490 std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.data());
2491 }
2492 std::free(OB.getBuffer());
2493
2494 if (Backrefs.FunctionParamCount > 0)
2495 std::printf("\n");
2496 std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
2497 for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
2498 std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
2499 Backrefs.Names[I]->Name.data());
2500 }
2501 if (Backrefs.NamesCount > 0)
2502 std::printf("\n");
2503}
2504
2505std::optional<size_t>
2506llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName) {
2507 std::string_view ProcessedName{MangledName};
2508
2509 // We only support this for MSVC-style C++ symbols.
2510 if (!consumeFront(ProcessedName, '?'))
2511 return std::nullopt;
2512
2513 // The insertion point is just after the name of the symbol, so parse that to
2514 // remove it from the processed name.
2515 Demangler D;
2516 D.demangleFullyQualifiedSymbolName(ProcessedName);
2517 if (D.Error)
2518 return std::nullopt;
2519
2520 return MangledName.length() - ProcessedName.length();
2521}
2522
2523char *llvm::microsoftDemangle(std::string_view MangledName, size_t *NMangled,
2524 int *Status, MSDemangleFlags Flags) {
2525 Demangler D;
2526
2527 std::string_view Name{MangledName};
2528 SymbolNode *AST = D.parse(Name);
2529 if (!D.Error && NMangled)
2530 *NMangled = MangledName.size() - Name.size();
2531
2532 if (Flags & MSDF_DumpBackrefs)
2533 D.dumpBackReferences();
2534
2536 if (Flags & MSDF_NoCallingConvention)
2538 if (Flags & MSDF_NoAccessSpecifier)
2540 if (Flags & MSDF_NoReturnType)
2541 OF = OutputFlags(OF | OF_NoReturnType);
2542 if (Flags & MSDF_NoMemberType)
2543 OF = OutputFlags(OF | OF_NoMemberType);
2544 if (Flags & MSDF_NoVariableType)
2545 OF = OutputFlags(OF | OF_NoVariableType);
2546
2547 int InternalStatus = demangle_success;
2548 char *Buf;
2549 if (D.Error)
2550 InternalStatus = demangle_invalid_mangled_name;
2551 else {
2552 OutputBuffer OB;
2553 AST->output(OB, OF);
2554 OB += '\0';
2555 Buf = OB.getBuffer();
2556 }
2557
2558 if (Status)
2559 *Status = InternalStatus;
2560 return InternalStatus == demangle_success ? Buf : nullptr;
2561}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEMANGLE_FALLTHROUGH
#define DEMANGLE_UNREACHABLE
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static bool startsWithLocalScopePattern(std::string_view S)
static bool isArrayType(std::string_view S)
static unsigned countEmbeddedNulls(const uint8_t *StringBytes, unsigned Length)
static bool startsWithDigit(std::string_view S)
static QualifiedNameNode * synthesizeQualifiedName(ArenaAllocator &Arena, IdentifierNode *Identifier)
static void outputEscapedChar(OutputBuffer &OB, unsigned C)
static bool isCustomType(std::string_view S)
static void outputHex(OutputBuffer &OB, unsigned C)
static std::pair< Qualifiers, PointerAffinity > demanglePointerCVQualifiers(std::string_view &MangledName)
static VariableSymbolNode * synthesizeVariable(ArenaAllocator &Arena, TypeNode *Type, std::string_view VariableName)
static unsigned decodeMultiByteChar(const uint8_t *StringBytes, unsigned CharIndex, unsigned CharBytes)
static void writeHexDigit(char *Buffer, uint8_t Digit)
static FunctionRefQualifier demangleFunctionRefQualifier(std::string_view &MangledName)
static bool isRebasedHexDigit(char C)
static NodeArrayNode * nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head, size_t Count)
static uint8_t rebasedHexDigitToNumber(char C)
static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length)
static NamedIdentifierNode * synthesizeNamedIdentifier(ArenaAllocator &Arena, std::string_view Name)
static bool startsWith(std::string_view S, std::string_view PrefixA, std::string_view PrefixB, bool A)
static bool consumeFront(std::string_view &S, char C)
static bool isFunctionType(std::string_view S)
static bool isPointerType(std::string_view S)
static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, uint64_t NumBytes)
static SpecialIntrinsicKind consumeSpecialIntrinsicKind(std::string_view &MangledName)
static bool isTagType(std::string_view S)
#define T
#define CH(x, y, z)
Definition SHA256.cpp:34
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
T * alloc(Args &&... ConstructorArgs)
DEMANGLE_ABI SymbolNode * parse(std::string_view &MangledName)
#define INT64_MAX
Definition DataTypes.h:71
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ OB
OB - OneByte - Set if this instruction has a one byte opcode.
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ demangle_success
Definition Demangle.h:31
@ demangle_invalid_mangled_name
Definition Demangle.h:29
DEMANGLE_ABI std::optional< size_t > getArm64ECInsertionPointInMangledName(std::string_view MangledName)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
DEMANGLE_ABI char * microsoftDemangle(std::string_view mangled_name, size_t *n_read, int *status, MSDemangleFlags Flags=MSDF_None)
Demangles the Microsoft symbol pointed at by mangled_name and returns it.
MSDemangleFlags
Definition Demangle.h:40
@ MSDF_NoReturnType
Definition Demangle.h:45
@ MSDF_DumpBackrefs
Definition Demangle.h:42
@ MSDF_NoMemberType
Definition Demangle.h:46
@ MSDF_NoVariableType
Definition Demangle.h:47
@ MSDF_NoCallingConvention
Definition Demangle.h:44
@ MSDF_NoAccessSpecifier
Definition Demangle.h:43
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
NodeList * Next
NamedIdentifierNode * Names[Max]
void output(OutputBuffer &OB, OutputFlags Flags) const override