~ubuntu-branches/ubuntu/hoary/malaga/hoary

Committer: Bazaar Package Importer
Author(s): Thomas Bushnell, BSG
Date: 2004-08-20 12:58:50 UTC
mfrom: (1.1.1 upstream)
Revision ID: james.westby@ubuntu.com-20040820125850-rx9s8bn0ep8jgist

Tags: 6.13-4

This should have been urgency=high, because it is an important and
long-delayed accomodation to new upstream with a bajillion bug fixes.

files added:
COPYING.txt

Makefile.in

README.txt

allomorphs.c

allomorphs.h

analysis.c

analysis.h

avl_trees.c

avl_trees.h

basic.c

basic.h

breakpoints.c

breakpoints.h

cache.c

cache.h

canvas.c

canvas.h

commands.c

commands.h

debian/postinst-libmalaga6

debugger.c

debugger.h

display.c

display.h

errs

expressions.c

expressions.h

files.c

files.h

generation.c

generation.h

grammars/formal/choose_count.mor

grammars/formal/choose_count.pro

grammars/formal/growing_blocks.mor

grammars/formal/growing_blocks.pro

grammars/formal/max_count.mor

grammars/formal/max_count.pro

grammars/formal/palindrome.mor

grammars/formal/palindrome.pro

grammars/formal/quadratic.mor

grammars/formal/quadratic.pro

grammars/formal/repeat_word.mor

grammars/formal/repeat_word.pro

grammars/formal/same_count_mixed.mor

grammars/formal/same_count_mixed.pro

grammars/formal/same_count_with_noise.mor

grammars/formal/same_count_with_noise.pro

hangul.c

hangul.h

input.c

input.h

ksc_table.c

ksc_table.h

lex_compiler.c

lex_compiler.h

lexicon.c

lexicon.h

libmalaga.c

libmalaga.h

ltmain.sh

malaga.c

malaga.h

malaga.info

malaga.texi

malaga_files.c

malaga_files.h

malaga_lib.c

malaga_lib.h

maldump.c

mallex.c

malmake.c

malrul.c

malshow.c

malsym.c

options.c

options.h

patterns.c

patterns.h

pools.c

pools.h

result.c

result.h

rule_code.c

rule_code.h

rule_compiler.c

rule_compiler.h

rule_parser.c

rule_parser.h

rule_symbols.c

rule_symbols.h

rule_type.h

rules.c

rules.h

scanner.c

scanner.h

sym_compiler.c

sym_compiler.h

symbols.c

symbols.h

transmit.c

transmit.h

tree.c

tree.h

tries.c

tries.h

value_parser.c

value_parser.h

values.c

values.h

variables.c

variables.h

files removed:
LICENSE.txt

Makefile

debian/bin-README.Debian

debian/dev-README.Debian

debian/postinst-libmalaga-dev

debian/postinst-libmalaga2

debian/prerm-libmalaga-dev

debian/prerm-libmalaga2

debian/prerm-malaga-bin

doc/Makefile

doc/malaga.dvi

doc/malaga.html

doc/malaga.htoc

doc/malaga.tex

doc/malaga001.html

doc/malaga002.html

doc/malaga003.html

doc/malaga004.html

doc/malaga005.html

doc/malaga006.html

doc/malaga007.html

grammars/Makefile

grammars/formal/formal1.mor

grammars/formal/formal1.pro

grammars/formal/formal2.mor

grammars/formal/formal2.pro

grammars/formal/formal3.mor

grammars/formal/formal3.pro

grammars/formal/formal4.mor

grammars/formal/formal4.pro

grammars/formal/formal5.mor

grammars/formal/formal5.pro

grammars/formal/formal6.mor

grammars/formal/formal6.pro

grammars/formal/formal7.mor

grammars/formal/formal7.pro

grammars/formal/formal8.mor

grammars/formal/formal8.pro

man/man1

man/man1/malaga.1

man/man1/maldump.1

man/man1/mallex.1

man/man1/malmake.1

man/man1/malrul.1

man/man1/malsym.1

source

source/.pure

source/Makefile.in

source/analysis.c

source/analysis.h

source/basic.c

source/basic.h

source/breakpoints.c

source/breakpoints.h

source/cache.c

source/cache.h

source/commands.c

source/commands.h

source/debugger.c

source/debugger.h

source/display.c

source/display.h

source/files.c

source/files.h

source/generation.c

source/generation.h

source/hangul.c

source/hangul.h

source/input.c

source/input.h

source/lex_compiler.c

source/lex_compiler.h

source/lexicon.c

source/lexicon.h

source/libmalaga.c

source/libmalaga.h

source/libmatrix.c

source/malaga.c

source/malaga_files.c

source/malaga_files.h

source/malaga_lib.c

source/malaga_lib.h

source/maldump.c

source/mallex.c

source/malmake.c

source/malrul.c

source/malsym.c

source/options.c

source/options.h

source/patterns.c

source/patterns.h

source/pools.c

source/pools.h

source/rule_code.c

source/rule_code.h

source/rule_compiler.c

source/rule_compiler.h

source/rule_parser.c

source/rule_parser.h

source/rule_symbols.c

source/rule_symbols.h

source/rule_type.h

source/rules.c

source/rules.h

source/scanner.c

source/scanner.h

source/sym_compiler.c

source/sym_compiler.h

source/symbols.c

source/symbols.h

source/transmit.c

source/transmit.h

source/tries.c

source/tries.h

source/value_parser.c

source/value_parser.h

source/values.c

source/values.h

tcl/allomorph.tcl

tcl/display.tcl

tcl/path.tcl

tcl/result.tcl

tcl/selector.tcl

tcl/tclIndex

tcl/tools.tcl

tcl/tree.tcl

tcl/variables.tcl

files modified:
CHANGES.txt

INSTALL.txt

config.guess

config.sub

configure

configure.in

debian/changelog

debian/control

debian/doc-control

debian/postinst-malaga-bin

debian/postinst-malaga-doc

debian/prerm-malaga-doc

debian/rules

debian/shlibs.lib

grammars/formal/formal.all

grammars/german/german.all

grammars/german/german.esym

grammars/german/german.lex

grammars/german/german.mor

grammars/german/german.pro

grammars/german/german.sym

grammars/german/german.syn

grammars/numeral/numeral.all

grammars/numeral/numeral.mor

grammars/numeral/numeral.pro

malaga.el

Show diffs side-by-side

added added

removed removed

source/scanner.c

/* This file is part of Malaga, a system for Natural Language Analysis.

* Bjoern Beutel

* Universitaet Erlangen-Nuernberg

* Abteilung fuer Computerlinguistik

* Bismarckstrasse 12

* D-91054 Erlangen

* e-mail: malaga@linguistik.uni-erlangen.de

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation; either version 2 of the License, or

* (at your option) any later version.

* This program is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with this program; if not, write to the Free Software

* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */

/* description ==============================================================*/

/* This module supports scanning (lexical analysis) of malaga source files. */

/* includes =================================================================*/

#include <ctype.h>

#include <string.h>

#include <stdio.h>

#include <errno.h>

#include "basic.h"

#include "files.h"

#undef GLOBAL

#define GLOBAL

#include "scanner.h"

/* constants ================================================================*/

#define INCLUDE_LEVEL_MAX 10 /* maximum number of nested includes */

LOCAL struct { string_t name; int_t code; } keywords[NUMBER_OF_KEYWORDS] =

/* list of all keywords and their token codes

* (this list must be maintained in alphabetical order) */

{

{ "accept", TOK_ACCEPT },

{ "allo_rule", TOK_ALLO_RULE },

{ "and", TOK_AND },

{ "assert", TOK_ASSERT },

{ "choose", TOK_CHOOSE },

{ "combi_rule", TOK_COMBI_RULE },

{ "define", TOK_DEFINE },

{ "else", TOK_ELSE },

{ "elseif", TOK_ELSEIF },

{ "end", TOK_END },

{ "end_rule", TOK_END_RULE },

{ "error", TOK_ERROR },

{ "fail", TOK_FAIL },

{ "foreach", TOK_FOREACH },

{ "greater", TOK_GREATER },

{ "greater_equal", TOK_GREATER_EQUAL },

{ "if", TOK_IF },

{ "in", TOK_IN },

{ "include", TOK_INCLUDE },

{ "initial", TOK_INITIAL },

{ "input_filter", TOK_INPUT_FILTER },

{ "less", TOK_LESS },

{ "less_equal", TOK_LESS_EQUAL },

{ "matches", TOK_MATCHES },

{ "not", TOK_NOT },

{ "or", TOK_OR },

{ "output_filter", TOK_OUTPUT_FILTER },

{ "parallel", TOK_PARALLEL },

{ "pruning_rule", TOK_PRUNING_RULE },

{ "repeat", TOK_REPEAT },

{ "require", TOK_REQUIRE },

{ "result", TOK_RESULT },

{ "return", TOK_RETURN },

{ "robust_rule", TOK_ROBUST_RULE },

{ "rules", TOK_RULES },

{ "subrule", TOK_SUBRULE },

{ "then", TOK_THEN },

{ "while", TOK_WHILE }

};

/* types ====================================================================*/

typedef struct /* a source stream for lexical analysis */

{

FILE *stream; /* the input stream for this include level */

string_t file_name; /* the name of the input file */

int_t column; /* column that has been read */

int_t line_number; /* number of the line that has been read */

} source_t;

100

101

/* variables ================================================================*/

102

103

LOCAL source_t sources[INCLUDE_LEVEL_MAX];

104

/* For each include level, we define a source stream description. */

105

106

LOCAL int_t include_level = 0; /* current include level */

107

108

LOCAL source_t *source = NULL; /* points to <sources>[<include_level>-1] */

109

110

LOCAL string_t scanner_input = NULL;

111

/* If no file is included, the scanner reads its input from <scanner_input> */

112

113

LOCAL int_t next_char; /* the next char to be read */

114

115

LOCAL text_t token_text; /* the text of the next token. */

116

117

/* functions ================================================================*/

118

119

LOCAL void read_next_char (void)

120

/* Read the next char from input into <next_char>.

121

* If end of input stream is reached, return EOF.

122

* If no input stream is selected, read input from <input_buffer>

123

* If reading from stream, update column information. */

124

{

125

if (source != NULL)

126

{

127

next_char = getc (source->stream);

128

129

if (next_char == EOF && ferror (source->stream))

130

error ("can't read from \"%s\": %s",

131

source->file_name, strerror (errno));

132

133

if (next_char == '\t')

134

source->column = (source->column + 8) & ~7;

135

else if (next_char == '\n')

136

{

137

source->column = 0;

138

source->line_number++;

139

}

140

else

141

source->column++;

142

}

143

else if (scanner_input != NULL && *scanner_input != EOS)

144

next_char = *scanner_input++;

145

else

146

{

147

scanner_input = NULL;

148

next_char = EOF;

149

}

150

}

151

152

/*---------------------------------------------------------------------------*/

153

154

LOCAL void read_next_char_again (void)

155

/* Like "read_next_char", but don't update column information. */

156

{

157

if (source != NULL)

158

{

159

next_char = getc (source->stream);

160

161

if (next_char == EOF && ferror (source->stream))

162

error ("can't read from \"%s\"", source->file_name);

163

}

164

else if (scanner_input != NULL && *scanner_input != EOS)

165

next_char = *scanner_input++;

166

else

167

{

168

scanner_input = NULL;

169

next_char = EOF;

170

}

171

}

172

173

/*---------------------------------------------------------------------------*/

174

175

GLOBAL string_t current_file_name (void)

176

/* Return the name of the file reading from or NULL. */

177

{

178

if (source == NULL)

179

return NULL;

180

else

181

return source->file_name;

182

}

183

184

/*---------------------------------------------------------------------------*/

185

186

GLOBAL int_t current_line_number (void)

187

/* Return the line number where the last char has been read or -1. */

188

{

189

if (source == NULL)

190

return -1;

191

else

192

return source->line_number;

193

}

194

195

/*---------------------------------------------------------------------------*/

196

197

GLOBAL int_t current_column (void)

198

/* Return the column where the last char has been read or -1. */

199

{

200

if (source == NULL)

201

return -1;

202

else if (source->column == 0)

203

return 0;

204

else

205

return source->column - 1; /* Let columns start with 0. */

206

}

207

208

/*---------------------------------------------------------------------------*/

209

210

GLOBAL void set_scanner_input (string_t input)

211

/* Let the scanner use <input> as scanner input.

212

* <input> must remain valid until the scanner has done its work. */

213

{

214

scanner_input = input;

215

read_next_char ();

216

read_next_token ();

217

}

218

219

/*---------------------------------------------------------------------------*/

220

221

GLOBAL void begin_include (string_t file_name)

222

/* Open a new level of inclusion and read tokens from file <file_name>. */

223

{

224

FILE *stream;

225

226

if (include_level >= INCLUDE_LEVEL_MAX)

227

error ("too many nested includes");

228

229

stream = open_stream (file_name, "r");

230

231

/* Next char of old source should be read later. */

232

if (source != NULL)

233

ungetc (next_char, source->stream);

234

else if (scanner_input != NULL)

235

scanner_input--;

236

237

source = sources + include_level;

238

include_level++;

239

source->file_name = file_name;

240

source->line_number = 1;

241

source->column = 0;

242

source->stream = stream;

243

read_next_char ();

244

read_next_token ();

245

}

246

247

/*---------------------------------------------------------------------------*/

248

249

GLOBAL void end_include (void)

250

/* Stop reading from current source stream and read from former stream. */

251

{

252

DB_ASSERT (include_level > 0);

253

254

close_stream (&source->stream, source->file_name);

255

256

include_level--;

257

if (include_level > 0)

258

source = sources + include_level - 1;

259

else

260

source = NULL;

261

262

if (source != NULL || scanner_input != NULL)

263

{

264

read_next_char_again ();

265

read_next_token ();

266

}

267

}

268

269

/*---------------------------------------------------------------------------*/

270

271

GLOBAL void stop_scanner (void)

272

/* Stop the scanner in case of an emergency. */

273

{

274

int_t i;

275

276

source = NULL;

277

scanner_input = NULL;

278

for (i = 0; i < include_level; i++)

279

close_stream (&sources[i].stream, NULL);

280

include_level = 0;

281

}

282

283

/*---------------------------------------------------------------------------*/

284

285

LOCAL void read_name (void)

286

/* Read rule name, variable, or keyword into <token_name>. */

287

{

288

clear_text (&token_text);

289

290

while (next_char != EOF &&

291

(next_char == '_' || next_char == '&' || next_char == '|'

292

|| IS_ALPHA (next_char) || isdigit (next_char)))

293

{

294

add_char_to_text (token_text, next_char);

295

read_next_char ();

296

}

297

298

token_name = text_string (token_text);

299

if (*token_name == EOS)

300

error ("illegal character in name");

301

}

302

303

/*---------------------------------------------------------------------------*/

304

305

LOCAL int_t keyword (string_t name)

306

/* Look up <name> in the keyword table and return its token value.

307

* If <name> is no keyword, return TOK_IDENT. */

308

{

309

int_t lower = 0;

310

int_t upper = NUMBER_OF_KEYWORDS - 1;

311

312

/* We do a binary search on the keywords.

313

* A keyword must be in the range of keywords[lower..upper]. */

314

while (lower <= upper)

315

{

316

int_t middle = (lower + upper) / 2;

317

int_t result = strcmp_no_case (name, keywords[middle].name);

318

319

if (result < 0)

320

upper = middle - 1;

321

else if (result > 0)

322

lower = middle + 1;

323

else

324

return keywords[middle].code;

325

}

326

return TOK_IDENT;

327

}

328

329

/*---------------------------------------------------------------------------*/

330

331

LOCAL void read_number (void)

332

/* Read a floating point number. Save its value in <token_number>. */

333

{

334

clear_text (&token_text);

335

336

while (isdigit (next_char))

337

{

338

add_char_to_text (token_text, next_char);

339

read_next_char ();

340

}

341

342

if (next_char == '.')

343

{

344

add_char_to_text (token_text, next_char);

345

read_next_char ();

346

347

if (! isdigit (next_char))

348

error ("missing digits after \".\"");

349

350

while (isdigit (next_char))

351

{

352

add_char_to_text (token_text, next_char);

353

read_next_char ();

354

}

355

}

356

357

if (next_char == 'E' || next_char == 'e') /* Read an exponent. */

358

{

359

add_char_to_text (token_text, next_char);

360

read_next_char ();

361

362

if (next_char == '-' || next_char == '+')

363

{

364

add_char_to_text (token_text, next_char);

365

read_next_char ();

366

}

367

368

if (! isdigit (next_char))

369

error ("missing exponent");

370

371

while (isdigit (next_char))

372

{

373

add_char_to_text (token_text, next_char);

374

read_next_char ();

375

}

376

}

377

token_name = text_string (token_text);

378

if (sscanf (token_name, "%lf", &token_number) != 1)

379

error ("illegal double value");

380

}

381

382

/*---------------------------------------------------------------------------*/

383

384

GLOBAL void read_next_token (void)

385

/* Read the next token from current source into <next_token>.

386

* If end of input stream is reached, return EOF. */

387

{

388

/* Read chars until a token has been recognised. */

389

while (TRUE)

390

{

391

switch (next_char)

392

{

393

case EOF:

394

next_token = EOF;

395

return;

396

397

case ' ': /* Read over whitespace. */

398

case '\t':

399

case '\n':

400

read_next_char ();

401

break;

402

403

case '#': /* Read over a comment. */

404

405

{

406

read_next_char ();

407

} while (next_char != '\n' && next_char != EOF);

408

break;

409

410

case '\"': /* Read a string. */

411

clear_text (&token_text);

412

read_next_char (); /* overread beginning '"' */

413

while (next_char != '\"')

414

{

415

if (next_char == '\\')

416

{

417

/* See if we get '\"'. */

418

read_next_char ();

419

if (next_char != '\"')

420

add_to_text (token_text, "\\");

421

}

422

423

if (next_char == EOF || next_char == '\n')

424

error ("unterminated string at end of line");

425

426

add_char_to_text (token_text, next_char);

427

read_next_char ();

428

}

429

read_next_char (); /* overread ending '"' */

430

free_mem (&token_string);

431

token_string = new_string (text_string (token_text), NULL);

432

next_token = TOK_STRING;

433

return;

434

435

case ':': /* Read a ":", ":=", ":=+", ":=-", ":=*", ":=/". */

436

read_next_char ();

437

if (next_char == '=')

438

{

439

read_next_char ();

440

if (next_char == '+')

441

{

442

next_token = TOK_ASSIGN_PLUS;

443

read_next_char ();

444

}

445

else if (next_char == '-')

446

{

447

next_token = TOK_ASSIGN_MINUS;

448

read_next_char ();

449

}

450

else if (next_char == '*')

451

{

452

next_token = TOK_ASSIGN_ASTERISK;

453

read_next_char ();

454

}

455

else if (next_char == '/')

456

{

457

next_token = TOK_ASSIGN_SLASH;

458

read_next_char ();

459

}

460

else

461

next_token = TOK_ASSIGN;

462

}

463

else

464

next_token = ':';

465

return;

466

467

case '/': /* Read a "/", a "/=" or a "/~". */

468

read_next_char ();

469

if (next_char == '=')

470

{

471

next_token = TOK_NOT_EQUAL;

472

read_next_char ();

473

}

474

else if (next_char == '~')

475

{

476

next_token = TOK_NOT_CONGRUENT;

477

read_next_char ();

478

}

479

else

480

next_token = '/';

481

return;

482

483

case '-':

484

read_next_char ();

485

if (! isdigit (next_char))

486

next_token = '-';

487

else

488

{

489

read_number ();

490

token_number = -token_number;

491

next_token = TOK_NUMBER;

492

}

493

return;

494

495

case '0': case '1': case '2': case '3': case '4':

496

case '5': case '6': case '7': case '8': case '9':

497

/* Read a number. */

498

read_number ();

499

next_token = TOK_NUMBER;

500

return;

501

502

case '$':

503

read_next_char ();

504

read_name ();

505

next_token = TOK_VARIABLE;

506

return;

507

508

case '@':

509

read_next_char ();

510

read_name ();

511

next_token = TOK_CONSTANT;

512

return;

513

514

default:

515

if (IS_ALPHA (next_char)

516

|| next_char == '_' || next_char == '&' || next_char == '|')

517

{

518

read_name ();

519

next_token = keyword (token_name);

520

return;

521

}

522

else

523

{

524

next_token = next_char;

525

read_next_char ();

526

return;

527

}

528

}

529

}

530

}

531

532

/*---------------------------------------------------------------------------*/

533

534

GLOBAL string_t token_as_text (int_t token)

535

/* Return <token> as a string readable for humans.

536

* The string is valid until freed with "free". */

537

{

538

int_t i;

539

540

/* Look if <token> is a keyword. */

541

for (i = 0; i < NUMBER_OF_KEYWORDS; i++)

542

{

543

if (keywords[i].code == token)

544

return concat_strings ("\"", keywords[i].name, "\"", NULL);

545

}

546

547

switch (token)

548

{

549

case EOF: return new_string ("end of input", NULL);

550

case TOK_STRING: return new_string ("string", NULL);

551

case TOK_IDENT: return new_string ("identifier", NULL);

552

case TOK_VARIABLE: return new_string ("variable", NULL);

553

case TOK_CONSTANT: return new_string ("constant", NULL);

554

case TOK_NUMBER: return new_string ("number", NULL);

555

case TOK_ASSIGN: return new_string ("\":=\"", NULL);

556

case TOK_ASSIGN_PLUS: return new_string ("\":=+\"", NULL);

557

case TOK_ASSIGN_MINUS: return new_string ("\":=-\"", NULL);

558

case TOK_ASSIGN_ASTERISK: return new_string ("\":=*\"", NULL);

559

case TOK_ASSIGN_SLASH: return new_string ("\":=/\"", NULL);

560

case TOK_NOT_EQUAL: return new_string ("\"/=\"", NULL);

561

case TOK_NOT_CONGRUENT: return new_string ("\"/~\"", NULL);

562

default:

563

{

564

char token_buffer[2];

565

566

token_buffer[0] = token;

567

token_buffer[1] = EOS;

568

return new_string_readable (token_buffer, NULL);

569

}

570

}

571

}

572

573

/*---------------------------------------------------------------------------*/

574

575

GLOBAL void test_token (int_t token)

576

/* Test if <token> is the next token. If it's not, report an error. */

577

{

578

if (next_token != token)

579

error ("%s expected, not %s",

580

token_as_text (token), token_as_text (next_token));

581

}

582

583

/*---------------------------------------------------------------------------*/

584

585

GLOBAL void parse_token (int_t token)

586

/* Test if <token> is the next token and read next token. */

587

{

588

test_token (token);

589

read_next_token ();

590

}

591

592

/* end of file ==============================================================*/

Older »