17
17
// MA 02111-1307 USA
19
19
#include "Puma/CScanner.h"
20
#include "Puma/SB_Sequential.h"
21
#include "Puma/SB_WholeFile.h"
22
#include "Puma/SB_String.h"
23
20
#include "Puma/ErrorStream.h"
24
21
#include "Puma/Source.h"
25
22
#include "Puma/Token.h"
26
23
#include "Puma/Location.h"
27
24
#include "Puma/Unit.h"
28
25
#include "Puma/CTokens.h"
26
#include "Puma/ScanBuffer.h"
47
Token *CScanner::scan () {
48
CRecognizer::Lang lang;
52
switch (buffer ().state ()) {
53
case ScanBuffer::STATE_OK:
54
switch (recognize (lang, expr, len)) {
55
case -1: result = new Token (Token::ID_ERROR); break;
57
case 1 : result = make_token (lang, expr, len); break;
60
case ScanBuffer::STATE_END:
61
result = new Token (Token::ID_END_OF_FILE);
63
case ScanBuffer::STATE_ERROR:
64
result = new Token (Token::ID_ERROR);
72
47
LanguageID CScanner::map_lang (CRecognizer::Lang lang) {
74
49
case CRecognizer::COMMENT: return Token::comment_id;
76
51
case CRecognizer::COMP_DIR: return Token::dir_id;
77
52
case CRecognizer::WHITE: return Token::white_id;
78
53
case CRecognizer::PRE: return Token::macro_op_id;
79
case CRecognizer::STRING: return Token::cpp_id;
80
54
case CRecognizer::CORE: return Token::cpp_id;
81
55
case CRecognizer::KEYWORD: return Token::keyword_id;
82
56
case CRecognizer::ID: return Token::identifier_id;
92
66
char short_buffer[512];
93
67
char *tok_buffer = short_buffer;
70
int offset = loc.column ();
71
Mode old_mode = mode ();
96
73
// dynamically allocate a huge buffer
97
if (len >= (int)sizeof (short_buffer))
74
if (len >= (int)sizeof (short_buffer) - 1)
98
75
tok_buffer = new char[len + 1];
100
char *src = buffer ().token ();
101
char *dest = tok_buffer;
102
char *end = src + len;
107
if (*src == '\\' && src + 1 < end && *(src + 1) == '\n') {
110
if (!cl) cl = new Array<int>(10,10);
114
else if (*src == '\\' && src + 2 < end && *(src + 1) == '\x0d' &&
115
*(src + 2) == '\n') {
118
if (!cl) cl = new Array<int>(10,10);
133
buffer ().accept (len);
77
// select the correct language id
135
78
if (lang == CRecognizer::UNKNOWN)
136
79
expr = Token::ID_UNKNOWN;
138
// select the correct language id
142
if (mode () == CRecognizer::IN_COMP_DIR && lang != CRecognizer::COMMENT)
81
if (old_mode == CRecognizer::IN_COMP_DIR && lang != CRecognizer::COMMENT)
143
82
lid = Token::dir_id;
145
84
if (lang == CRecognizer::CORE && expr == TOK_OPEN_ROUND)
152
91
lid = map_lang (lang);
155
result = new Token (expr, lid, tok_buffer);
156
result->location (loc);
158
// set the next token location
160
loc.setup (loc.filename (), loc.line () + rows);
162
// attach the continuation line marks
163
if (cl) result->cont_lines (cl);
94
// volatile int z, s = 0;
95
// for (z = 0; z < 100; z++)
97
char *src = buffer ().token ();
98
if (!buffer ().new_line (len)) {
99
// a token without newline => can be copied directly
100
memcpy (tok_buffer, src, len);
101
tok_buffer[len] = '\0';
102
result = new Token (expr, lid, tok_buffer);
103
result->location (loc);
104
loc.setup (loc.filename (), loc.line (), loc.column () + len);
107
// special token with newline => more complicated
108
char *dest = tok_buffer;
109
char *end = src + len;
114
if (*src == '\\' && src + 1 < end && *(src + 1) == '\n') {
119
if (!cl) cl = new Array<int>(10,10);
123
else if (*src == '\\' && src + 2 < end && *(src + 1) == '\x0d' &&
124
*(src + 2) == '\n') {
129
if (!cl) cl = new Array<int>(10,10);
151
result = new Token (expr, lid, tok_buffer);
152
result->location (loc);
154
// set the next token location
155
if (rows > 0 || columns > 0)
156
loc.setup (loc.filename (), loc.line () + rows, offset + columns);
158
// attach the continuation line marks
159
if (cl) result->cont_lines (cl);
161
// eventually accept the token
162
buffer ().accept (len);
165
164
// free a dynamically allocated huge buffer
166
165
if (len >= (int)sizeof (short_buffer))
173
172
void CScanner::scan_all (Unit &unit) {
176
loc.setup (unit.name () ? unit.name () : "<anonymous unit>", 1L);
178
while ((token = scan ())->type () != Token::ID_END_OF_FILE) {
179
if (token->type () == Token::ID_UNKNOWN)
180
err << sev_error << token->location ()
181
<< "Unknown token" << endMessage;
182
else if (token->type () == Token::ID_ERROR) {
183
err << sev_error << token->location ()
184
<< "Error while scanning tokens" << endMessage;
174
loc.setup (unit.name () ? unit.name () : "<anonymous unit>", 1, 1);
176
while (buffer ().state () == CScanBuffer::STATE_OK ||
177
buffer ().state () == CScanBuffer::STATE_NEW) {
179
CRecognizer::Lang lang;
181
int result = recognize (lang, expr, len);
183
// if (lang == WHITE && mode () == NORMAL && !buffer ().new_line (len)) {
184
// // eventually accept the token
185
// buffer ().accept (len);
188
// if (lang == COMMENT) {
189
// // eventually accept the token
190
// buffer ().accept (len);
195
Token *new_token = make_token (lang, expr, len);
196
unit.append (*new_token);
197
// cout << loc << " token " << new_token->type () << ": " << new_token->text () << endl;
200
err << sev_error << loc
201
<< "Error while scanning tokens" << endMessage;
186
} else if (token->type () == Token::ID_WARNING)
187
err << sev_warning << token->location ()
188
<< "Warning while scanning tokens" << endMessage;
190
unit.append (*token);
192
if (token->type () == Token::ID_END_OF_FILE)
197
208
void CScanner::fill_unit (Source &in, Unit &unit) {
198
if (in.size () > 0) {
199
SB_WholeFile whole_file_buffer;
201
whole_file_buffer.init (err, in);
202
setup (whole_file_buffer);
205
SB_Sequential seq_buffer;
207
seq_buffer.init (in);
210
int size = in.size ();
214
err << sev_error << "can't scan file of unknown size" << endMessage;
217
char *buf = new char[size];
218
if (in.read (buf, size) != size) {
219
err << sev_error << "can't load input file" << endMessage;
223
buffer ().init (buf, size);
214
229
void CScanner::fill_unit (const char *in, Unit &unit) {
215
SB_String string_buffer;
217
string_buffer.init (in);
218
setup (string_buffer);