2
// <copyright see="prj:///doc/copyright.txt"/>
3
// <license see="prj:///doc/license.txt"/>
4
// <owner name="Andrea Paatz" email="andrea@icsharpcode.net"/>
5
// <version>$Revision: 4482 $</version>
9
using System.Globalization;
13
namespace ICSharpCode.OldNRefactory.Parser.VB
15
internal sealed class Lexer : AbstractLexer
18
bool isAtLineBegin = false; // TODO: handle line begin, if neccessarry
20
public Lexer(TextReader reader) : base(reader)
24
public override Token NextToken()
26
if (curToken == null) { // first call of NextToken()
28
specialTracker.InformToken(curToken.kind);
29
//Console.WriteLine("Tok:" + Tokens.GetTokenString(curToken.kind) + " --- " + curToken.val);
35
if (curToken.next == null) {
36
curToken.next = Next();
37
specialTracker.InformToken(curToken.next.kind);
40
curToken = curToken.next;
42
if (curToken.kind == Tokens.EOF && !(lastToken.kind == Tokens.EOL)) { // be sure that before EOF there is an EOL token
43
curToken = new Token(Tokens.EOL, curToken.col, curToken.line, "\n");
44
specialTracker.InformToken(curToken.kind);
45
curToken.next = new Token(Tokens.EOF, curToken.col, curToken.line, "\n");
46
specialTracker.InformToken(curToken.next.kind);
48
//Console.WriteLine("Tok:" + Tokens.GetTokenString(curToken.kind) + " --- " + curToken.val);
52
bool misreadExclamationMarkAsTypeCharacter;
54
protected override Token Next()
56
if (misreadExclamationMarkAsTypeCharacter) {
57
misreadExclamationMarkAsTypeCharacter = false;
58
return new Token(Tokens.ExclamationMark, Col - 1, Line);
62
Location startLocation = new Location(Col, Line);
63
int nextChar = ReaderRead();
65
return new Token(Tokens.EOF);
66
char ch = (char)nextChar;
67
if (Char.IsWhiteSpace(ch)) {
68
if (HandleLineEnd(ch)) {
70
// second line end before getting to a token
71
// -> here was a blank line
72
specialTracker.AddEndOfLine(startLocation);
75
return new Token(Tokens.EOL, startLocation, new Location(Col, Line), null, null, LiteralFormat.None);
81
if (ReaderPeek() == -1) {
82
errors.Error(Line, Col, String.Format("No EOF expected after _"));
83
return new Token(Tokens.EOF);
85
if (!Char.IsWhiteSpace((char)ReaderPeek())) {
88
string s = ReadIdent('_');
90
return new Token(Tokens.Identifier, x, y, s);
92
ch = (char)ReaderRead();
94
bool oldLineEnd = lineEnd;
96
while (Char.IsWhiteSpace(ch)) {
97
if (HandleLineEnd(ch)) {
101
if (ReaderPeek() != -1) {
102
ch = (char)ReaderRead();
104
errors.Error(Line, Col, String.Format("No EOF expected after _"));
105
return new Token(Tokens.EOF);
109
errors.Error(Line, Col, String.Format("Return expected"));
111
lineEnd = oldLineEnd;
116
while (Char.IsWhiteSpace((char)ReaderPeek())) {
119
if (Char.IsDigit((char)ReaderPeek())) {
122
string s = ReadDate();
123
DateTime time = new DateTime(1, 1, 1, 0, 0, 0);
125
time = DateTime.Parse(s, System.Globalization.CultureInfo.InvariantCulture, DateTimeStyles.NoCurrentDateDefault);
126
} catch (Exception e) {
127
errors.Error(Line, Col, String.Format("Invalid date time {0}", e));
129
return new Token(Tokens.LiteralDate, x, y, s, time, LiteralFormat.DateTimeLiteral);
131
ReadPreprocessorDirective();
136
if (ch == '[') { // Identifier
138
if (ReaderPeek() == -1) {
139
errors.Error(Line, Col, String.Format("Identifier expected"));
141
ch = (char)ReaderRead();
142
if (ch == ']' || Char.IsWhiteSpace(ch)) {
143
errors.Error(Line, Col, String.Format("Identifier expected"));
147
string s = ReadIdent(ch);
148
if (ReaderPeek() == -1) {
149
errors.Error(Line, Col, String.Format("']' expected"));
151
ch = (char)ReaderRead();
153
errors.Error(Line, Col, String.Format("']' expected"));
155
return new Token(Tokens.Identifier, x, y, s);
157
if (Char.IsLetter(ch)) {
161
string s = ReadIdent(ch, out typeCharacter);
162
if (typeCharacter == '\0') {
163
int keyWordToken = Keywords.GetToken(s);
164
if (keyWordToken >= 0) {
165
// handle 'REM' comments
166
if (keyWordToken == Tokens.Rem) {
170
return new Token(Tokens.EOL, Col, Line, "\n");
176
return new Token(keyWordToken, x, y, s);
181
return new Token(Tokens.Identifier, x, y, s);
184
if (Char.IsDigit(ch)) {
186
return ReadDigit(ch, Col - 1);
190
if (ReaderPeek() == -1) {
191
return ReadOperator('&');
193
ch = (char)ReaderPeek();
194
if (Char.ToUpper(ch, CultureInfo.InvariantCulture) == 'H' || Char.ToUpper(ch, CultureInfo.InvariantCulture) == 'O') {
195
return ReadDigit('&', Col - 1);
197
return ReadOperator('&');
199
if (ch == '\'' || ch == '\u2018' || ch == '\u2019') {
205
return new Token(Tokens.EOL, x, y, "\n");
213
string s = ReadString();
214
if (ReaderPeek() != -1 && (ReaderPeek() == 'C' || ReaderPeek() == 'c')) {
217
errors.Error(Line, Col, String.Format("Chars can only have Length 1 "));
222
return new Token(Tokens.LiteralCharacter, x, y, '"' + s + "\"C", s[0], LiteralFormat.CharLiteral);
224
return new Token(Tokens.LiteralString, x, y, '"' + s + '"', s, LiteralFormat.StringLiteral);
226
Token token = ReadOperator(ch);
231
errors.Error(Line, Col, String.Format("Unknown char({0}) which can't be read", ch));
236
string ReadIdent(char ch)
239
return ReadIdent(ch, out typeCharacter);
242
string ReadIdent(char ch, out char typeCharacter)
244
typeCharacter = '\0';
249
while ((peek = ReaderPeek()) != -1 && (Char.IsLetterOrDigit(ch = (char)peek) || ch == '_')) {
251
sb.Append(ch.ToString());
254
return sb.ToString();
257
if ("%&@!#$".IndexOf((char)peek) != -1) {
258
typeCharacter = (char)peek;
260
if (typeCharacter == '!') {
262
if (peek != -1 && (peek == '_' || peek == '[' || char.IsLetter((char)peek))) {
263
misreadExclamationMarkAsTypeCharacter = true;
267
return sb.ToString();
272
return Char.ToUpper((char)ReaderPeek(), CultureInfo.InvariantCulture);
275
[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1818:DoNotConcatenateStringsInsideLoops")]
276
Token ReadDigit(char ch, int x)
289
bool issingle = false;
290
bool isdouble = false;
291
bool isdecimal = false;
293
if (ReaderPeek() == -1) {
295
errors.Error(Line, Col, String.Format("digit expected"));
297
return new Token(Tokens.LiteralInteger, x, y, sb.ToString() ,ch - '0', LiteralFormat.DecimalNumber);
300
if (Char.IsDigit((char)ReaderPeek())) {
301
isdouble = true; // double is default
302
if (ishex || isokt) {
303
errors.Error(Line, Col, String.Format("No hexadecimal or oktadecimal floating point values allowed"));
305
while (ReaderPeek() != -1 && Char.IsDigit((char)ReaderPeek())){ // read decimal digits beyond the dot
306
digit += (char)ReaderRead();
309
} else if (ch == '&' && PeekUpperChar() == 'H') {
310
const string hex = "0123456789ABCDEF";
311
sb.Append((char)ReaderRead()); // skip 'H'
312
while (ReaderPeek() != -1 && hex.IndexOf(PeekUpperChar()) != -1) {
313
ch = (char)ReaderRead();
315
digit += Char.ToUpper(ch, CultureInfo.InvariantCulture);
318
} else if (ReaderPeek() != -1 && ch == '&' && PeekUpperChar() == 'O') {
319
const string okt = "01234567";
320
sb.Append((char)ReaderRead()); // skip 'O'
321
while (ReaderPeek() != -1 && okt.IndexOf(PeekUpperChar()) != -1) {
322
ch = (char)ReaderRead();
324
digit += Char.ToUpper(ch, CultureInfo.InvariantCulture);
328
while (ReaderPeek() != -1 && Char.IsDigit((char)ReaderPeek())) {
329
ch = (char)ReaderRead();;
335
if (digit.Length == 0) {
336
errors.Error(Line, Col, String.Format("digit expected"));
337
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), 0, LiteralFormat.DecimalNumber);
340
if (ReaderPeek() != -1 && "%&SILU".IndexOf(PeekUpperChar()) != -1 || ishex || isokt) {
341
bool unsigned = false;
342
if (ReaderPeek() != -1) {
343
ch = (char)ReaderPeek();
345
ch = Char.ToUpper(ch, CultureInfo.InvariantCulture);
346
unsigned = ch == 'U';
348
ReaderRead(); // read the U
349
ch = (char)ReaderPeek();
351
ch = Char.ToUpper(ch, CultureInfo.InvariantCulture);
352
if (ch != 'I' && ch != 'L' && ch != 'S') {
353
errors.Error(Line, Col, "Invalid type character: U" + ch);
361
for (int i = 0; i < digit.Length; ++i) {
362
number = number * 8 + digit[i] - '0';
366
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), (ushort)number, LiteralFormat.OctalNumber);
368
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), (short)number, LiteralFormat.OctalNumber);
369
} else if (ch == '%' || ch == 'I') {
371
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), (uint)number, LiteralFormat.OctalNumber);
373
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), (int)number, LiteralFormat.OctalNumber);
374
} else if (ch == '&' || ch == 'L') {
376
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), (ulong)number, LiteralFormat.OctalNumber);
378
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), (long)number, LiteralFormat.OctalNumber);
380
if (number > uint.MaxValue) {
381
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), unchecked((long)number), LiteralFormat.OctalNumber);
383
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), unchecked((int)number), LiteralFormat.OctalNumber);
387
LiteralFormat literalFormat = ishex ? LiteralFormat.HexadecimalNumber : LiteralFormat.DecimalNumber;
391
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), UInt16.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number), literalFormat);
393
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), Int16.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number), literalFormat);
394
} else if (ch == '%' || ch == 'I') {
397
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), UInt32.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number), literalFormat);
399
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), Int32.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number), literalFormat);
400
} else if (ch == '&' || ch == 'L') {
403
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), UInt64.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number), literalFormat);
405
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), Int64.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number), literalFormat);
407
ulong number = UInt64.Parse(digit, NumberStyles.HexNumber);
408
if (number > uint.MaxValue) {
409
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), unchecked((long)number), literalFormat);
411
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), unchecked((int)number), literalFormat);
414
} catch (OverflowException ex) {
415
errors.Error(Line, Col, ex.Message);
416
return new Token(Tokens.LiteralInteger, x, y, sb.ToString(), 0, LiteralFormat.None);
419
Token nextToken = null; // if we accedently read a 'dot'
420
if (!isdouble && ReaderPeek() == '.') { // read floating point number
422
if (ReaderPeek() != -1 && Char.IsDigit((char)ReaderPeek())) {
423
isdouble = true; // double is default
424
if (ishex || isokt) {
425
errors.Error(Line, Col, String.Format("No hexadecimal or oktadecimal floating point values allowed"));
428
while (ReaderPeek() != -1 && Char.IsDigit((char)ReaderPeek())){ // read decimal digits beyond the dot
429
digit += (char)ReaderRead();
432
nextToken = new Token(Tokens.Dot, Col - 1, Line);
436
if (ReaderPeek() != -1 && PeekUpperChar() == 'E') { // read exponent
438
digit += (char)ReaderRead();
439
if (ReaderPeek() != -1 && (ReaderPeek() == '-' || ReaderPeek() == '+')) {
440
digit += (char)ReaderRead();
442
while (ReaderPeek() != -1 && Char.IsDigit((char)ReaderPeek())) { // read exponent value
443
digit += (char)ReaderRead();
447
if (ReaderPeek() != -1) {
448
switch (PeekUpperChar()) {
469
return new Token(Tokens.LiteralSingle, x, y, sb.ToString(), Single.Parse(digit, CultureInfo.InvariantCulture), LiteralFormat.DecimalNumber);
472
return new Token(Tokens.LiteralDecimal, x, y, sb.ToString(), Decimal.Parse(digit, NumberStyles.Currency | NumberStyles.AllowExponent, CultureInfo.InvariantCulture), LiteralFormat.DecimalNumber);
475
return new Token(Tokens.LiteralDouble, x, y, sb.ToString(), Double.Parse(digit, CultureInfo.InvariantCulture), LiteralFormat.DecimalNumber);
477
} catch (FormatException) {
478
errors.Error(Line, Col, String.Format("{0} is not a parseable number", digit));
480
return new Token(Tokens.LiteralSingle, x, y, sb.ToString(), 0f, LiteralFormat.DecimalNumber);
482
return new Token(Tokens.LiteralDecimal, x, y, sb.ToString(), 0m, LiteralFormat.DecimalNumber);
484
return new Token(Tokens.LiteralDouble, x, y, sb.ToString(), 0.0, LiteralFormat.DecimalNumber);
488
token = new Token(Tokens.LiteralInteger, x, y, sb.ToString(), Int32.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number), ishex ? LiteralFormat.HexadecimalNumber : LiteralFormat.DecimalNumber);
489
} catch (Exception) {
491
token = new Token(Tokens.LiteralInteger, x, y, sb.ToString(), Int64.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number), ishex ? LiteralFormat.HexadecimalNumber : LiteralFormat.DecimalNumber);
492
} catch (FormatException) {
493
errors.Error(Line, Col, String.Format("{0} is not a parseable number", digit));
494
// fallback, when nothing helps :)
495
token = new Token(Tokens.LiteralInteger, x, y, sb.ToString(), 0, LiteralFormat.DecimalNumber);
496
} catch (OverflowException) {
497
errors.Error(Line, Col, String.Format("{0} is too long for a integer literal", digit));
498
// fallback, when nothing helps :)
499
token = new Token(Tokens.LiteralInteger, x, y, sb.ToString(), 0, LiteralFormat.DecimalNumber);
502
token.next = nextToken;
506
void ReadPreprocessorDirective()
508
Location start = new Location(Col - 1, Line);
509
string directive = ReadIdent('#');
510
string argument = ReadToEndOfLine();
511
this.specialTracker.AddPreprocessingDirective(new PreprocessingDirective(directive, argument.Trim(), start, new Location(start.Column + directive.Length + argument.Length, start.Line)));
519
while ((nextChar = ReaderRead()) != -1) {
523
} else if (ch == '\n') {
524
errors.Error(Line, Col, String.Format("No return allowed inside Date literal"));
530
errors.Error(Line, Col, String.Format("End of File reached before Date literal terminated"));
532
return sb.ToString();
540
while ((nextChar = ReaderRead()) != -1) {
543
if (ReaderPeek() != -1 && ReaderPeek() == '"') {
549
} else if (ch == '\n') {
550
errors.Error(Line, Col, String.Format("No return allowed inside String literal"));
556
errors.Error(Line, Col, String.Format("End of File reached before String terminated "));
558
return sb.ToString();
563
Location startPos = new Location(Col, Line);
565
StringBuilder curWord = specialCommentHash != null ? new StringBuilder() : null;
566
int missingApostrophes = 2; // no. of ' missing until it is a documentation comment
568
while ((nextChar = ReaderRead()) != -1) {
569
char ch = (char)nextChar;
571
if (HandleLineEnd(ch)) {
577
if (missingApostrophes > 0) {
578
if (ch == '\'' || ch == '\u2018' || ch == '\u2019') {
579
if (--missingApostrophes == 0) {
580
specialTracker.StartComment(CommentType.Documentation, isAtLineBegin, startPos);
584
specialTracker.StartComment(CommentType.SingleLine, isAtLineBegin, startPos);
585
missingApostrophes = 0;
589
if (specialCommentHash != null) {
590
if (Char.IsLetter(ch)) {
593
string tag = curWord.ToString();
595
if (specialCommentHash.ContainsKey(tag)) {
596
Location p = new Location(Col, Line);
597
string comment = ch + ReadToEndOfLine();
598
this.TagComments.Add(new TagComment(tag, comment, isAtLineBegin, p, new Location(Col, Line)));
605
if (missingApostrophes > 0) {
606
specialTracker.StartComment(CommentType.SingleLine, isAtLineBegin, startPos);
608
specialTracker.AddString(sb.ToString());
609
specialTracker.FinishComment(new Location(Col, Line));
612
Token ReadOperator(char ch)
618
switch (ReaderPeek()) {
621
return new Token(Tokens.PlusAssign, x, y);
625
return new Token(Tokens.Plus, x, y);
627
switch (ReaderPeek()) {
630
return new Token(Tokens.MinusAssign, x, y);
634
return new Token(Tokens.Minus, x, y);
636
switch (ReaderPeek()) {
639
return new Token(Tokens.TimesAssign, x, y);
643
return new Token(Tokens.Times, x, y, "*");
645
switch (ReaderPeek()) {
648
return new Token(Tokens.DivAssign, x, y);
652
return new Token(Tokens.Div, x, y);
654
switch (ReaderPeek()) {
657
return new Token(Tokens.DivIntegerAssign, x, y);
661
return new Token(Tokens.DivInteger, x, y);
663
switch (ReaderPeek()) {
666
return new Token(Tokens.ConcatStringAssign, x, y);
670
return new Token(Tokens.ConcatString, x, y);
672
switch (ReaderPeek()) {
675
return new Token(Tokens.PowerAssign, x, y);
679
return new Token(Tokens.Power, x, y);
681
return new Token(Tokens.Colon, x, y);
683
return new Token(Tokens.Assign, x, y);
685
switch (ReaderPeek()) {
688
return new Token(Tokens.LessEqual, x, y);
691
return new Token(Tokens.NotEqual, x, y);
694
switch (ReaderPeek()) {
697
return new Token(Tokens.ShiftLeftAssign, x, y);
701
return new Token(Tokens.ShiftLeft, x, y);
703
return new Token(Tokens.LessThan, x, y);
705
switch (ReaderPeek()) {
708
return new Token(Tokens.GreaterEqual, x, y);
711
if (ReaderPeek() != -1) {
712
switch (ReaderPeek()) {
715
return new Token(Tokens.ShiftRightAssign, x, y);
720
return new Token(Tokens.ShiftRight, x, y);
722
return new Token(Tokens.GreaterThan, x, y);
724
return new Token(Tokens.Comma, x, y);
726
// Prevent OverflowException when Peek returns -1
727
int tmp = ReaderPeek();
728
if (tmp > 0 && Char.IsDigit((char)tmp)) {
729
return ReadDigit('.', Col);
731
return new Token(Tokens.Dot, x, y);
733
return new Token(Tokens.OpenParenthesis, x, y);
735
return new Token(Tokens.CloseParenthesis, x, y);
737
return new Token(Tokens.OpenCurlyBrace, x, y);
739
return new Token(Tokens.CloseCurlyBrace, x, y);
741
return new Token(Tokens.QuestionMark, x, y);
743
return new Token(Tokens.ExclamationMark, x, y);
748
public override void SkipCurrentBlock(int targetToken)
751
int kind = base.lastToken.kind;
752
while (kind != Tokens.EOF &&
753
!(lastKind == Tokens.End && kind == targetToken))
757
kind = lastToken.kind;