2
/* **********************************************************************************
3
* Copyright (c) Roman Ivantsov
4
* This source code is subject to terms and conditions of the MIT License
5
* for Irony. A copy of the license can be found in the License.txt file
6
* at the root of this distribution.
7
* By using this source code in any fashion, you are agreeing to be bound by the terms of the
9
* You must not remove this notice from this software.
10
* **********************************************************************************/
14
using System.Collections.Generic;
17
namespace Irony.CompilerServices {
18
#region About compound terminals
20
As it turns out, many terminal types in real-world languages have 3-part structure: prefix-body-suffix
21
The body is essentially the terminal "value", while prefix and suffix are used to specify additional
22
information (options), while not being a part of the terminal itself.
24
1. c# numbers, may have 0x prefix for hex representation, and suffixes specifying
25
the exact data type of the literal (f, l, m, etc)
26
2. c# string may have "@" prefix which disables escaping inside the string
27
3. c# identifiers may have "@" prefix and escape sequences inside - just like strings
28
4. Python string may have "u" and "r" prefixes, "r" working the same way as @ in c# strings
29
5. VB string literals may have "c" suffix identifying that the literal is a character, not a string
30
6. VB number literals and identifiers may have suffixes identifying data type
32
So it seems like all these terminals have the format "prefix-body-suffix".
33
The CompoundTerminalBase base class implements base functionality supporting this multi-part structure.
34
The IdentifierTerminal, NumberLiteral and StringLiteral classes inherit from this base class.
35
The methods in TerminalFactory static class demonstrate that with this architecture we can define the whole
36
variety of terminals for c#, Python and VB.NET languages.
41
public class EscapeTable : Dictionary<char, char> { }
43
public abstract class CompoundTerminalBase : Terminal {
45
#region Nested classes
46
protected class ScanFlagTable : Dictionary<string, short> { }
47
protected class TypeCodeTable : Dictionary<string, TypeCode[]> { }
49
public class CompoundTokenDetails {
54
public short Flags; //need to be short, because we need to save it in Scanner state for Vs integration
56
public TypeCode[] TypeCodes;
57
public string ControlSymbol_;
58
public string ExponentSymbol; //exponent symbol for Number literal
59
public string StartSymbol; //string start and end symbols
60
public string EndSymbol;
62
//partial token info, used by VS integration
63
public bool PartialOk;
64
public bool IsPartial;
65
public bool PartialContinues;
66
public byte SubTypeIndex; //used for string literal kind
68
public bool IsSet(short flag) {
69
return (Flags & flag) != 0;
75
#region constructors and initialization
76
public CompoundTerminalBase(string name, TermOptions options) : base(name) {
78
Escapes = GetDefaultEscapes();
80
public CompoundTerminalBase(string name) : this(name, TermOptions.None) { }
82
protected void AddPrefixFlag(string prefix, short flags) {
83
PrefixFlags.Add(prefix, flags);
86
public void AddSuffixCodes(string suffix, params TypeCode[] codes) {
87
SuffixTypeCodes.Add(suffix, codes);
92
#region public Properties/Fields
93
public Char EscapeChar = '\\';
94
public EscapeTable Escapes = new EscapeTable();
98
#region private fields
99
protected readonly ScanFlagTable PrefixFlags = new ScanFlagTable();
100
protected readonly TypeCodeTable SuffixTypeCodes = new TypeCodeTable();
101
protected StringList Prefixes = new StringList();
102
protected StringList Suffixes = new StringList();
103
protected bool CaseSensitive; //case sensitivity for prefixes and suffixes
104
string _prefixesFirsts; //first chars of all prefixes, for fast prefix detection
105
string _suffixesFirsts; //first chars of all suffixes, for fast suffix detection
109
#region overrides: Init, TryMatch
110
public override void Init(GrammarData grammarData) {
111
base.Init(grammarData);
112
//collect all suffixes, prefixes in lists and create strings of first chars for both
113
Prefixes.Sort(StringList.LongerFirst);
114
_prefixesFirsts = string.Empty;
115
foreach (string pfx in Prefixes)
116
_prefixesFirsts += pfx[0];
118
Suffixes.Sort(StringList.LongerFirst);
119
_suffixesFirsts = string.Empty;
120
foreach (string sfx in Suffixes)
121
_suffixesFirsts += sfx[0]; //we don't care if there are repetitions
122
if (!CaseSensitive) {
123
_prefixesFirsts = _prefixesFirsts.ToLower() + _prefixesFirsts.ToUpper();
124
_suffixesFirsts = _suffixesFirsts.ToLower() + _suffixesFirsts.ToUpper();
128
public override IList<string> GetFirsts() {
132
public override Token TryMatch(CompilerContext context, ISourceStream source) {
134
//Try quick parse first, but only if we're not continuing
135
if (context.ScannerState.Value == 0) {
136
token = QuickParse(context, source);
137
if (token != null) return token;
138
source.Position = source.TokenStart.Position; //revert the position
141
CompoundTokenDetails details = new CompoundTokenDetails();
142
InitDetails(context, details);
144
if (context.ScannerState.Value == 0)
145
ReadPrefix(source, details);
146
if (!ReadBody(source, details))
148
if (details.Error != null)
149
return context.CreateErrorTokenAndReportError(source.TokenStart, source.CurrentChar.ToString(), details.Error);
150
if (details.IsPartial) {
151
details.Value = details.Body;
153
ReadSuffix(source, details);
155
if (!ConvertValue(details))
156
return context.CreateErrorTokenAndReportError(source.TokenStart, source.CurrentChar.ToString(), "Failed to convert the value: " + details.Error);
158
token = CreateToken(context, source, details);
160
if (details.IsPartial) {
161
//Save terminal state so we can continue
162
context.ScannerState.TokenSubType = (byte)details.SubTypeIndex;
163
context.ScannerState.TerminalFlags = (short)details.Flags;
164
context.ScannerState.TerminalIndex = this.MultilineIndex;
166
context.ScannerState.Value = 0;
170
protected virtual Token CreateToken(CompilerContext context, ISourceStream source, CompoundTokenDetails details) {
171
string lexeme = source.GetLexeme();
172
Token token = new Token(this, source.TokenStart, lexeme, details.Value);
173
token.Details = details;
174
if (details.IsPartial)
175
token.Flags |= TokenFlags.IsIncomplete;
179
protected virtual void InitDetails(CompilerContext context, CompoundTokenDetails details) {
180
details.PartialOk = (context.Mode == CompileMode.VsLineScan);
181
details.PartialContinues = (context.ScannerState.Value != 0);
184
protected virtual Token QuickParse(CompilerContext context, ISourceStream source) {
188
protected virtual void ReadPrefix(ISourceStream source, CompoundTokenDetails details) {
189
if (_prefixesFirsts.IndexOf(source.CurrentChar) < 0)
191
foreach (string pfx in Prefixes) {
192
if (!source.MatchSymbol(pfx, !CaseSensitive)) continue;
194
details.Prefix = pfx;
195
source.Position += pfx.Length;
196
//Set flag from prefix
198
if (!string.IsNullOrEmpty(details.Prefix) && PrefixFlags.TryGetValue(details.Prefix, out pfxFlags))
199
details.Flags |= (short) pfxFlags;
204
protected virtual bool ReadBody(ISourceStream source, CompoundTokenDetails details) {
208
protected virtual void ReadSuffix(ISourceStream source, CompoundTokenDetails details) {
209
if (_suffixesFirsts.IndexOf(source.CurrentChar) < 0) return;
210
foreach (string sfx in Suffixes) {
211
if (!source.MatchSymbol(sfx, !CaseSensitive)) continue;
213
details.Suffix = sfx;
214
source.Position += sfx.Length;
215
//Set TypeCode from suffix
217
if (!string.IsNullOrEmpty(details.Suffix) && SuffixTypeCodes.TryGetValue(details.Suffix, out codes))
218
details.TypeCodes = codes;
223
protected virtual bool ConvertValue(CompoundTokenDetails details) {
224
details.Value = details.Body;
231
#region utils: GetDefaultEscapes
232
public static EscapeTable GetDefaultEscapes() {
233
EscapeTable escapes = new EscapeTable();
234
escapes.Add('a', '\u0007');
235
escapes.Add('b', '\b');
236
escapes.Add('t', '\t');
237
escapes.Add('n', '\n');
238
escapes.Add('v', '\v');
239
escapes.Add('f', '\f');
240
escapes.Add('r', '\r');
241
escapes.Add('"', '"');
242
escapes.Add('\'', '\'');
243
escapes.Add('\\', '\\');
244
escapes.Add(' ', ' ');
245
escapes.Add('\n', '\n'); //this is a special escape of the linebreak itself,
246
// when string ends with "\" char and continues on the next line