114 MAI.useAtForSpecifier();
115 LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers();
121 bool EndStatementAtEOF) {
127 CurPtr = CurBuf.begin();
130 this->EndStatementAtEOF = EndStatementAtEOF;
135AsmToken AsmLexer::ReturnError(
const char *
Loc,
const std::string &Msg) {
141int AsmLexer::getNextChar() {
142 if (CurPtr == CurBuf.
end())
144 return (
unsigned char)*CurPtr++;
147int AsmLexer::peekNextChar() {
148 if (CurPtr == CurBuf.end())
150 return (
unsigned char)*CurPtr;
156AsmToken AsmLexer::LexFloatLiteral() {
161 if (*CurPtr ==
'-' || *CurPtr ==
'+')
162 return ReturnError(CurPtr,
"invalid sign in float literal");
165 if ((*CurPtr ==
'e' || *CurPtr ==
'E')) {
168 if (*CurPtr ==
'-' || *CurPtr ==
'+')
176 StringRef(TokStart, CurPtr - TokStart));
185AsmToken AsmLexer::LexHexFloatLiteral(
bool NoIntDigits) {
186 assert((*CurPtr ==
'p' || *CurPtr ==
'P' || *CurPtr ==
'.') &&
187 "unexpected parse state in floating hex");
188 bool NoFracDigits =
true;
191 if (*CurPtr ==
'.') {
194 const char *FracStart = CurPtr;
198 NoFracDigits = CurPtr == FracStart;
201 if (NoIntDigits && NoFracDigits)
202 return ReturnError(TokStart,
"invalid hexadecimal floating-point constant: "
203 "expected at least one significand digit");
206 if (*CurPtr !=
'p' && *CurPtr !=
'P')
207 return ReturnError(TokStart,
"invalid hexadecimal floating-point constant: "
208 "expected exponent part 'p'");
211 if (*CurPtr ==
'+' || *CurPtr ==
'-')
215 const char *ExpStart = CurPtr;
219 if (CurPtr == ExpStart)
220 return ReturnError(TokStart,
"invalid hexadecimal floating-point constant: "
221 "expected at least one exponent digit");
223 return AsmToken(
AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
228 return isAlnum(
C) ||
C ==
'_' ||
C ==
'$' ||
C ==
'.' ||
C ==
'?' ||
229 (AllowAt &&
C ==
'@') || (AllowHash &&
C ==
'#');
234 if (CurPtr[-1] ==
'.' &&
isDigit(*CurPtr)) {
240 AllowHashInIdentifier) ||
241 *CurPtr ==
'e' || *CurPtr ==
'E')
242 return LexFloatLiteral();
245 while (
isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
249 if (CurPtr == TokStart+1 && TokStart[0] ==
'.')
259 if (!MAI.shouldAllowAdditionalComments()) {
260 IsAtStartOfStatement =
false;
266 IsAtStartOfStatement =
false;
270 return LexLineComment();
272 IsAtStartOfStatement =
false;
278 const char *CommentTextStart = CurPtr;
279 while (CurPtr != CurBuf.end()) {
286 if (CommentConsumer) {
287 CommentConsumer->HandleComment(
289 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
293 StringRef(TokStart, CurPtr - TokStart));
296 return ReturnError(TokStart,
"unterminated comment");
301AsmToken AsmLexer::LexLineComment() {
306 const char *CommentTextStart = CurPtr;
307 int CurChar = getNextChar();
308 while (CurChar !=
'\n' && CurChar !=
'\r' && CurChar != EOF)
309 CurChar = getNextChar();
310 const char *NewlinePtr = CurPtr;
311 if (CurChar ==
'\r' && CurPtr != CurBuf.end() && *CurPtr ==
'\n')
315 if (CommentConsumer) {
316 CommentConsumer->HandleComment(
318 StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
321 IsAtStartOfLine =
true;
323 if (IsAtStartOfStatement)
325 StringRef(TokStart, CurPtr - TokStart));
326 IsAtStartOfStatement =
true;
329 StringRef(TokStart, CurPtr - 1 - TokStart));
334 if (CurPtr[0] ==
'U' || CurPtr[0] ==
'u')
336 if (CurPtr[0] ==
'L' || CurPtr[0] ==
'l')
338 if (CurPtr[0] ==
'L' || CurPtr[0] ==
'l')
346 const char *FirstNonDec =
nullptr;
347 const char *LookAhead = CurPtr;
353 FirstNonDec = LookAhead;
362 bool isHex = LexHex && (*LookAhead ==
'h' || *LookAhead ==
'H');
363 CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
369static const char *
findLastDigit(
const char *CurPtr,
unsigned DefaultRadix) {
377 if (
Value.isIntN(64))
391 return "hexadecimal";
393 return "base-" + std::to_string(Radix);
409 if (LexMasmIntegers && isdigit(CurPtr[-1])) {
410 const char *FirstNonBinary =
411 (CurPtr[-1] !=
'0' && CurPtr[-1] !=
'1') ? CurPtr - 1 : nullptr;
412 const char *FirstNonDecimal =
413 (CurPtr[-1] <
'0' || CurPtr[-1] >
'9') ? CurPtr - 1 : nullptr;
414 const char *OldCurPtr = CurPtr;
418 if (!FirstNonDecimal) {
419 FirstNonDecimal = CurPtr;
430 if (!FirstNonBinary) {
431 FirstNonBinary = CurPtr;
440 if (*CurPtr ==
'.') {
444 return LexFloatLiteral();
447 if (LexMasmHexFloats && (*CurPtr ==
'r' || *CurPtr ==
'R')) {
449 return AsmToken(
AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
453 if (*CurPtr ==
'h' || *CurPtr ==
'H') {
457 }
else if (*CurPtr ==
't' || *CurPtr ==
'T') {
461 }
else if (*CurPtr ==
'o' || *CurPtr ==
'O' || *CurPtr ==
'q' ||
466 }
else if (*CurPtr ==
'y' || *CurPtr ==
'Y') {
470 }
else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
472 (*FirstNonDecimal ==
'd' || *FirstNonDecimal ==
'D')) {
474 }
else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
476 (*FirstNonBinary ==
'b' || *FirstNonBinary ==
'B')) {
481 StringRef
Result(TokStart, CurPtr - TokStart);
482 APInt
Value(128, 0,
true);
485 return ReturnError(TokStart,
"invalid " +
radixName(Radix) +
" number");
499 if (LexMasmIntegers && UseMasmDefaultRadix) {
501 StringRef
Result(TokStart, CurPtr - TokStart);
503 APInt
Value(128, 0,
true);
505 return ReturnError(TokStart,
506 "invalid " +
radixName(DefaultRadix) +
" number");
513 if (LexMotorolaIntegers && CurPtr[-1] ==
'$') {
514 const char *NumStart = CurPtr;
519 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result))
520 return ReturnError(TokStart,
"invalid hexadecimal number");
522 return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
526 if (LexMotorolaIntegers && CurPtr[-1] ==
'%') {
527 const char *NumStart = CurPtr;
528 while (*CurPtr ==
'0' || *CurPtr ==
'1')
532 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result))
533 return ReturnError(TokStart,
"invalid binary number");
535 return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
542 if (LexHLASMIntegers || CurPtr[-1] !=
'0' || CurPtr[0] ==
'.') {
545 if (!LexHLASMIntegers) {
546 bool IsHex = Radix == 16;
548 if (!IsHex && (*CurPtr ==
'.' || *CurPtr ==
'e' || *CurPtr ==
'E')) {
551 return LexFloatLiteral();
555 StringRef
Result(TokStart, CurPtr - TokStart);
557 APInt
Value(128, 0,
true);
559 return ReturnError(TokStart,
"invalid " +
radixName(Radix) +
" number");
561 if (!LexHLASMIntegers)
569 if (!LexMasmIntegers && ((*CurPtr ==
'b') || (*CurPtr ==
'B'))) {
574 StringRef
Result(TokStart, CurPtr - TokStart);
577 const char *NumStart = CurPtr;
578 while (CurPtr[0] ==
'0' || CurPtr[0] ==
'1')
582 if (CurPtr == NumStart)
583 return ReturnError(TokStart,
"invalid binary number");
585 StringRef
Result(TokStart, CurPtr - TokStart);
587 APInt
Value(128, 0,
true);
589 return ReturnError(TokStart,
"invalid binary number");
598 if ((*CurPtr ==
'x') || (*CurPtr ==
'X')) {
600 const char *NumStart = CurPtr;
606 if (CurPtr[0] ==
'.' || CurPtr[0] ==
'p' || CurPtr[0] ==
'P')
607 return LexHexFloatLiteral(NumStart == CurPtr);
610 if (CurPtr == NumStart)
611 return ReturnError(CurPtr-2,
"invalid hexadecimal number");
614 if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
615 return ReturnError(TokStart,
"invalid hexadecimal number");
618 if (LexMasmIntegers && (*CurPtr ==
'h' || *CurPtr ==
'H'))
625 return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
629 APInt
Value(128, 0,
true);
631 StringRef
Result(TokStart, CurPtr - TokStart);
633 return ReturnError(TokStart,
"invalid " +
radixName(Radix) +
" number");
647AsmToken AsmLexer::LexSingleQuote() {
648 int CurChar = getNextChar();
651 return ReturnError(TokStart,
"invalid usage of character literals");
653 if (LexMasmStrings) {
654 while (CurChar != EOF) {
655 if (CurChar !=
'\'') {
656 CurChar = getNextChar();
657 }
else if (peekNextChar() ==
'\'') {
661 CurChar = getNextChar();
667 return ReturnError(TokStart,
"unterminated string constant");
672 CurChar = getNextChar();
675 return ReturnError(TokStart,
"unterminated single quote");
677 CurChar = getNextChar();
680 return ReturnError(TokStart,
"single quote way too long");
684 StringRef Res = StringRef(TokStart,CurPtr - TokStart);
688 char theChar = Res[2];
690 default:
Value = theChar;
break;
691 case '\'':
Value =
'\'';
break;
692 case 't':
Value =
'\t';
break;
693 case 'n':
Value =
'\n';
break;
694 case 'b':
Value =
'\b';
break;
695 case 'f':
Value =
'\f';
break;
696 case 'r':
Value =
'\r';
break;
706 int CurChar = getNextChar();
708 return ReturnError(TokStart,
"invalid usage of string literals");
710 if (LexMasmStrings) {
711 while (CurChar != EOF) {
712 if (CurChar !=
'"') {
713 CurChar = getNextChar();
714 }
else if (peekNextChar() ==
'"') {
718 CurChar = getNextChar();
724 return ReturnError(TokStart,
"unterminated string constant");
728 while (CurChar !=
'"') {
729 if (CurChar ==
'\\') {
731 CurChar = getNextChar();
735 return ReturnError(TokStart,
"unterminated string constant");
737 CurChar = getNextChar();
746 while (!isAtStartOfComment(CurPtr) &&
747 !isAtStatementSeparator(CurPtr) &&
748 *CurPtr !=
'\n' && *CurPtr !=
'\r' && CurPtr != CurBuf.end()) {
751 return StringRef(TokStart, CurPtr-TokStart);
757 while (*CurPtr !=
'\n' && *CurPtr !=
'\r' && CurPtr != CurBuf.
end()) {
760 return StringRef(TokStart, CurPtr-TokStart);
764 bool ShouldSkipSpace) {
771 std::string SavedErr =
getErr();
775 for (ReadCount = 0; ReadCount < Buf.
size(); ++ReadCount) {
778 Buf[ReadCount] = Token;
786 SetError(SavedErrLoc, SavedErr);
790bool AsmLexer::isAtStartOfComment(
const char *
Ptr) {
791 if (MAI.
isHLASM() && !IsAtStartOfStatement)
796 if (CommentString.
size() == 1)
797 return CommentString[0] ==
Ptr[0];
800 if (CommentString[1] ==
'#')
801 return CommentString[0] ==
Ptr[0];
803 return strncmp(
Ptr, CommentString.
data(), CommentString.
size()) == 0;
806bool AsmLexer::isAtStatementSeparator(
const char *
Ptr) {
814 int CurChar = getNextChar();
816 if (!IsPeeking && CurChar ==
'#' && IsAtStartOfStatement) {
819 AsmToken TokenBuf[2];
826 StringRef s = LexUntilEndOfLine();
832 if (MAI.shouldAllowAdditionalComments())
833 return LexLineComment();
836 if (isAtStartOfComment(TokStart)) {
837 StringRef CommentString = MAI.getCommentString();
841 if (CommentString.
size() > 1 &&
842 StringRef(TokStart, CommentString.
size()) == CommentString) {
843 CurPtr += CommentString.
size() - 1;
845 return LexLineComment();
848 if (isAtStatementSeparator(TokStart)) {
849 CurPtr += strlen(MAI.getSeparatorString()) - 1;
850 IsAtStartOfLine =
true;
851 IsAtStartOfStatement =
true;
853 StringRef(TokStart, strlen(MAI.getSeparatorString())));
858 if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
859 IsAtStartOfLine =
true;
860 IsAtStartOfStatement =
true;
863 IsAtStartOfLine =
false;
864 bool OldIsAtStartOfStatement = IsAtStartOfStatement;
865 IsAtStartOfStatement =
false;
872 if (isalpha(CurChar) || CurChar ==
'_' || CurChar ==
'.')
873 return LexIdentifier();
876 return ReturnError(TokStart,
"invalid character in input");
878 if (EndStatementAtEOF) {
879 IsAtStartOfLine =
true;
880 IsAtStartOfStatement =
true;
886 IsAtStartOfStatement = OldIsAtStartOfStatement;
887 while (*CurPtr ==
' ' || *CurPtr ==
'\t')
892 return AsmToken(
AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
894 IsAtStartOfLine =
true;
895 IsAtStartOfStatement =
true;
897 if (CurPtr != CurBuf.end() && *CurPtr ==
'\n')
900 StringRef(TokStart, CurPtr - TokStart));
903 IsAtStartOfLine =
true;
904 IsAtStartOfStatement =
true;
907 case '+':
return AsmToken(
AsmToken::Plus, StringRef(TokStart, 1));
915 case '*':
return AsmToken(
AsmToken::Star, StringRef(TokStart, 1));
918 if (LexMotorolaIntegers &&
isHexDigit(*CurPtr))
920 if (MAI.doesAllowDollarAtStartOfIdentifier())
921 return LexIdentifier();
925 if (MAI.doesAllowAtAtStartOfIdentifier())
926 return LexIdentifier();
930 return LexIdentifier();
933 if (MAI.doesAllowQuestionAtStartOfIdentifier())
934 return LexIdentifier();
938 if (*CurPtr ==
'=') {
944 if (*CurPtr ==
'>') {
950 if (*CurPtr ==
'|') {
957 if (*CurPtr ==
'&') {
963 if (*CurPtr ==
'=') {
969 if (LexMotorolaIntegers && (*CurPtr ==
'0' || *CurPtr ==
'1')) {
974 IsAtStartOfStatement = OldIsAtStartOfStatement;
976 case '\'':
return LexSingleQuote();
977 case '"':
return LexQuote();
978 case '0':
case '1':
case '2':
case '3':
case '4':
979 case '5':
case '6':
case '7':
case '8':
case '9':
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static std::string radixName(unsigned Radix)
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, bool LexHex)
static AsmToken intToken(StringRef Ref, APInt &Value)
static const char * findLastDigit(const char *CurPtr, unsigned DefaultRadix)
static bool isIdentifierChar(char C)
Return true if the given character satisfies the following regular expression: [-a-zA-Z$....
This file provides utility classes that use RAII to save and restore values.
Class for arbitrary precision integers.
size_t size() const
size - Get the array size.
LLVM_ABI AsmLexer(const MCAsmInfo &MAI)
void UnLex(AsmToken const &Token)
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
SMLoc getErrLoc()
Get the current error location.
const std::string & getErr()
Get the current error string.
LLVM_ABI StringRef LexUntilEndOfStatement()
LLVM_ABI void setBuffer(StringRef Buf, const char *ptr=nullptr, bool EndStatementAtEOF=true)
LLVM_ABI size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true)
Look ahead an arbitrary number of tokens.
Target independent representation for an assembler token.
LLVM_ABI SMLoc getLoc() const
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
LLVM_ABI SMLoc getEndLoc() const
LLVM_ABI void dump(raw_ostream &OS) const
LLVM_ABI SMRange getLocRange() const
This class is intended to be used as a base class for asm properties and features specific to the tar...
StringRef getCommentString() const
const char * getSeparatorString() const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
Represents a range in source code.
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & write_escaped(StringRef Str, bool UseHexEscapes=false)
Output Str, turning '\', '\t', ' ', '"', and anything that doesn't satisfy llvm::isPrint into an esca...
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
@ Ref
The access may reference the value stored in memory.
bool isHexDigit(char C)
Checks if character C is a hexadecimal numeric character.
A utility class that uses RAII to save and restore the value of a variable.