1
/* Copyright (C) 1995 Bjoern Beutel. */
3
/* Description. =============================================================*/
5
/* This program takes a lexicon file and compiles it to binary format.
6
* It also includes an interactive allomorph rules debugger. */
8
/* Includes. ================================================================*/
22
#include "rule_type.h"
25
#include "lex_compiler.h"
29
#include "breakpoints.h"
36
/* Variables. ===============================================================*/
38
static string_t allo_format; /* Format of allomorph output. */
40
static bool_t lex_tree_to_output = FALSE;
41
/* Indicates whether the lexicon tree can be printed. */
43
static string_t lexicon_file, rule_file, symbol_file, project_file;
44
static string_t prelex_file;
46
static string_t base_fs_string; /* Last base feature structure. */
48
/* Functions. ===============================================================*/
52
/* Print the name of the current rule. */
57
source_of_instr( executed_rule_sys, pc, &line, &file, &rule );
58
printf( "At \"%s\", line %d, rule \"%s\".",
59
name_in_path( file ), line, rule );
60
if (lex_entry_file_name != NULL)
62
printf( " (\"%s\", line %d)",
63
name_in_path( lex_entry_file_name ), lex_entry_line_number );
66
if (in_emacs_malaga_mode)
67
printf( "SHOW \"%s\":%d:0\n", file, line );
70
/*---------------------------------------------------------------------------*/
73
display_result( void )
74
/* Display result in the modes that have been switched on after analysis. */
78
start_display_process();
79
fprintf( display_stream, "allomorph\n" );
80
print_lex_tree( display_stream, "%n %s {%f}" );
81
fprintf( display_stream, "end\n" );
82
fflush( display_stream );
85
print_lex_tree( stdout, NULL );
88
/*---------------------------------------------------------------------------*/
91
do_result( string_t arguments )
92
/* Show result of last allomorph generation. */
94
parse_end( &arguments );
95
if (! lex_tree_to_output)
96
complain( "No previous allomorph generation." );
100
static command_t result_command =
102
"result res", do_result,
103
"Show result of last allomorph generation.\n"
107
/*---------------------------------------------------------------------------*/
110
do_read_constants( string_t arguments )
111
/* Read the constants in the lexicon with name on line ARGUMENTS. */
113
string_t lexicon_name;
115
assert_not_in_debug_mode();
116
lexicon_name = parse_absolute_path( &arguments, NULL );
117
parse_end( &arguments );
118
read_lex_constants( lexicon_name );
119
free_mem( &lexicon_name );
122
static command_t read_constants_command =
124
"read-constants", do_read_constants,
125
"Read the constants from the definitions in a lexicon file.\n"
126
"Usage: read-constants LEXICON_FILE\n"
127
"\"read-constants\" can't be used in debug mode.\n"
130
/*---------------------------------------------------------------------------*/
133
do_ga_file( string_t arguments )
134
/* Generate allomorphs of a base lexicon.
135
* Write the allomorphs readably into file. */
137
string_t lexicon_name, output_name;
140
assert_not_in_debug_mode();
142
output_stream = NULL;
143
output_name = lexicon_name = NULL;
146
lexicon_name = parse_absolute_path( &arguments, NULL );
147
if (*arguments != EOS)
148
output_name = parse_absolute_path( &arguments, NULL );
150
output_name = concat_strings( lexicon_name, ".out", NULL );
151
parse_end( &arguments );
153
set_debug_mode( RUN_MODE, NULL );
154
lex_tree_to_output = FALSE;
155
generate_allos_for_file( lexicon_name, NULL, TRUE );
156
lex_tree_to_output = TRUE;
158
output_stream = open_stream( output_name, "w" );
159
print_lex_tree( output_stream, allo_format );
160
print_lex_statistics( stdout );
161
close_stream( &output_stream, output_name );
165
close_stream( &output_stream, NULL );
166
free_mem( &output_name );
167
free_mem( &lexicon_name );
172
static command_t ga_file_command =
174
"ga-file gaf", do_ga_file,
175
"Generate allomorphs from the entries in a lexicon file.\n"
176
"Usage: ga-file LEXICON_FILE [ALLO_FILE]\n"
177
"The results are written to \"ALLO_FILE\".\n"
178
"If ALLO_FILE is missing, they are written to \"LEXICON_FILE.out\".\n"
179
"\"ga-file\" can't be used in debug mode.\n"
182
/*---------------------------------------------------------------------------*/
185
do_debug_ga_file( string_t arguments )
186
/* Generate allomorphs of the base lexicon with name in ARGUMENTS.
187
* Execute rules in debug mode. */
189
string_t lexicon_name;
191
assert_not_in_debug_mode();
195
lexicon_name = parse_absolute_path( &arguments, NULL );
196
parse_end( &arguments );
197
set_debug_mode( WALK_MODE, allo_rule_sys );
198
lex_tree_to_output = FALSE;
199
generate_allos_for_file( lexicon_name, NULL, TRUE );
200
lex_tree_to_output = TRUE;
203
free_mem( &lexicon_name );
207
static command_t debug_ga_file_command =
209
"debug-ga-file dgaf", do_debug_ga_file,
210
"Generate allomorphs from the entries in a lexicon file.\n"
211
"Execute the rules in debug mode.\n"
212
"Usage: debug-ga-file LEXICON_FILE\n"
213
"\"debug-ga-file\" can't be used in debug mode.\n"
216
/*---------------------------------------------------------------------------*/
219
generate_allomorphs_for_line( string_t arguments )
220
/* Generate allomorphs for ARGUMENTS, which should consist
221
* of a file name and a line number. */
223
string_t lexicon_name;
226
lexicon_name = parse_absolute_path( &arguments, NULL );
227
line = parse_int( &arguments );
228
parse_end( &arguments );
229
lex_tree_to_output = FALSE;
230
generate_allos_for_line( lexicon_name, line );
231
lex_tree_to_output = TRUE;
232
free_mem( &lexicon_name );
235
/*---------------------------------------------------------------------------*/
238
do_ga_line( string_t arguments )
239
/* Generate allomorphs for ARGUMENTS, which should consist
240
* of a file name and a line number. */
242
assert_not_in_debug_mode();
243
set_debug_mode( RUN_MODE, NULL );
244
generate_allomorphs_for_line( arguments );
248
static command_t ga_line_command =
250
"ga-line gal", do_ga_line,
251
"Generate allomorphs from a single entry in a file.\n"
252
"Usage: ga-line FILE LINE\n"
253
"The first lexicon entry at or behind LINE in FILE is read in.\n"
254
"\"ga-line\" can't be used in debug mode.\n"
257
/*---------------------------------------------------------------------------*/
260
do_debug_ga_line( string_t arguments )
261
/* Generate an allomorph for ARGUMENTS, which should consist
262
* of a file name and a line number, in debugger mode. */
264
assert_not_in_debug_mode();
265
set_debug_mode( WALK_MODE, allo_rule_sys );
266
generate_allomorphs_for_line( arguments );
269
static command_t debug_ga_line_command =
271
"debug-ga-line dgal", do_debug_ga_line,
272
"Generate allomorphs from a single entry in a file.\n"
273
"Execute allomorph rules in debug mode.\n"
274
"Usage: debug-ga-line FILE LINE\n"
275
"The first lexicon entry at or behind LINE in FILE is read in.\n"
276
"Allomorph rule execution stops at the first statement.\n"
277
"\"debug-line\" can't be used in debug mode.\n"
280
/*---------------------------------------------------------------------------*/
283
generate_allomorphs( string_t arguments )
284
/* Generate allomorphs for lexicon entry ARGUMENTS. */
286
/* If no argument given, re-analyze last argument */
287
if (*arguments == EOS)
289
if (base_fs_string == NULL)
290
complain( "No previous base feature structure." );
294
free_mem( &base_fs_string );
295
base_fs_string = new_string( arguments, NULL );
297
lex_tree_to_output = FALSE;
298
generate_allos_for_string( base_fs_string );
299
lex_tree_to_output = TRUE;
302
/*---------------------------------------------------------------------------*/
305
do_ga( string_t arguments )
306
/* Generate allomorphs for ARGUMENTS. */
308
assert_not_in_debug_mode();
309
set_debug_mode( RUN_MODE, NULL );
310
generate_allomorphs( arguments );
314
static command_t ga_command =
317
"Generate allomorphs from a feature structure argument.\n"
319
" ga FS -- Generate allomorphs for feature structure FS.\n"
320
" ga -- Re-generate allomorphs for the last argument.\n"
321
"The allomorphs are printed on screen.\n"
322
"\"ga\" can't be used in debug mode.\n"
325
/*---------------------------------------------------------------------------*/
328
do_debug_ga( string_t arguments )
329
/* Generate allomorphs for ARGUMENTS.
330
* Execute allomorph rules in debug mode. */
332
assert_not_in_debug_mode();
333
set_debug_mode( WALK_MODE, allo_rule_sys );
334
generate_allomorphs( arguments );
337
static command_t debug_ga_command =
339
"debug-ga dga ga-debug gad", do_debug_ga,
340
"Generate allomorphs from the feature structure argument. "
341
"Execute allomorph rules in debug mode.\n"
343
" debug-ga FS -- Generate allomorphs for feature structure FS.\n"
344
" debug-ga -- Re-generate allomorphs for the last argument.\n"
345
"Rule execution stops at the first statement.\n"
346
"The allomorphs are printed on screen.\n"
347
"\"debug-ga\" can't be used in debug mode.\n"
350
/*---------------------------------------------------------------------------*/
353
do_allo_format_option( string_t arguments )
354
/* Change allomorph output line to "arguments" */
358
if (*arguments == EOS)
360
format = new_string_readable( allo_format, NULL );
361
printf( "allo-format: %s\n", format );
366
format = parse_word( &arguments );
367
free_mem( &allo_format );
368
allo_format = format;
372
static command_t allo_format_option =
374
"allo-format", do_allo_format_option,
375
"Describe the format in which generated allomorphs will be printed.\n"
376
"Usage: allo-format STRING\n"
377
"STRING may contain the following special sequences:\n"
378
" %f -- Allomorph feature structure.\n"
379
" %n -- Allomorph number.\n"
380
" %s -- Allomorph surface.\n"
383
/* Commands. ================================================================*/
385
static command_t *mallex_options[] =
387
&alias_option, &allo_format_option, &auto_variables_option,
388
&display_line_option, &hidden_option, &sort_records_option, &switch_option,
389
&transmit_line_option, &use_display_option, &use_ksc_option,
393
static command_t *mallex_commands[] =
395
&backtrace_command, &break_command, &continue_command, &debug_ga_command,
396
&debug_ga_file_command, &debug_ga_line_command, &delete_command,
397
&down_command, &finish_command, &frame_command, &ga_command,
398
&ga_file_command, &ga_line_command, &get_command, &help_command,
399
&list_command, &next_command, &print_command, &quit_command,
400
&read_constants_command, &result_command, &run_command, &set_command,
401
&step_command, &transmit_command, &up_command, &variables_command,
402
&walk_command, &where_command,
406
/*---------------------------------------------------------------------------*/
409
read_project_file( string_t file_name )
410
/* Read the project file FILE_NAME. */
412
FILE *project_stream;
413
string_t include_file;
414
string_t project_line, project_line_p, argument, extension;
416
volatile bool_t binary = FALSE;
417
volatile int_t line_count;
418
static bool_t err_pos_printed;
420
err_pos_printed = FALSE;
421
project_stream = open_stream( file_name, "r" );
425
project_line = read_line( project_stream );
426
if (project_line == NULL)
429
cut_comment( project_line );
430
project_line_p = project_line;
432
if (*project_line_p != EOS)
437
argument = parse_word( &project_line_p );
440
if (strcmp_no_case( argument, "sym:" ) == 0)
442
name_p = &symbol_file;
446
else if (strcmp_no_case( argument, "lex:" ) == 0)
448
name_p = &lexicon_file;
452
else if (strcmp_no_case( argument, "all:" ) == 0)
458
else if (strcmp_no_case( argument, "prelex:" ) == 0)
460
if (prelex_file != NULL)
461
complain( "Prelex file already defined." );
462
name_p = &prelex_file;
463
extension = "prelex";
466
else if (strcmp_no_case( argument, "include:" ) == 0)
468
include_file = parse_absolute_path( &project_line_p, file_name );
469
parse_end( &project_line_p );
470
read_project_file( include_file );
471
free_mem( &include_file );
473
else if (strcmp_no_case( argument, "char-set:" ) == 0
474
|| strcmp_no_case( argument, "char_set:" ) == 0)
476
if (char_set != NULL)
477
complain( "Char set already defined." );
478
char_set = parse_word( &project_line_p );
479
parse_end( &project_line_p );
481
free_mem( &argument );
483
if (name_p != NULL && *name_p == NULL && *project_line_p != EOS)
485
argument = parse_absolute_path( &project_line_p, file_name );
486
if (! has_extension( argument, extension ))
488
complain( "\"%s\" should have extension \"%s\".",
489
name_in_path( argument ), extension );
492
set_binary_file_name( name_p, argument );
494
set_file_name( name_p, argument );
495
free_mem( &argument );
500
if (! err_pos_printed)
502
print_text( error_text, " (\"%s\", line %d)",
503
name_in_path( file_name ), line_count );
504
err_pos_printed = TRUE;
509
free_mem( &project_line );
511
close_stream( &project_stream, file_name );
514
/*---------------------------------------------------------------------------*/
517
main( int argc, char *argv[] )
518
/* The main function of "mallex". */
520
volatile enum {INTERACTIVE_MODE, BINARY_MODE, TEXT_MODE,
521
PRELEX_MODE} mallex_mode;
523
string_t malagarc_path, s;
524
rule_sys_name_t rule_systems[1]; /* Rule system for debugger. */
525
string_t object_file = NULL; /* Object file for binary and prelex mode. */
527
mallex_mode = INTERACTIVE_MODE;
528
init_basic( "mallex" );
531
/* Parse arguments. */
534
if (strcmp_no_case( argv[1], "--version" ) == 0
535
|| strcmp_no_case( argv[1], "-version" ) == 0
536
|| strcmp_no_case( argv[1], "-v" ) == 0)
541
else if (strcmp_no_case( argv[1], "--help" ) == 0
542
|| strcmp_no_case( argv[1], "-help" ) == 0
543
|| strcmp_no_case( argv[1], "-h" ) == 0)
545
printf( "Apply the allomorph rules on the entries of a Malaga lexicon.\n"
549
"-- Start interactive mallex.\n"
550
"mallex GRAMMAR -b[inary] "
551
"-- Create binary allomorph lexicon.\n"
552
"mallex GRAMMAR -r[eadable] "
553
"-- Output readable allomorph lexicon.\n"
554
"mallex GRAMMAR -p[relex] "
555
"-- Output precompiled lexicon.\n"
557
"-- Print version information.\n"
559
"-- Print this help.\n\n"
560
"GRAMMAR may be \"PROJECT_FILE\" "
561
"or \"SYM_FILE ALLO_FILE LEX_FILE [PRELEX_FILE]\".\n"
562
"PROJECT_FILE must end on \".pro\".\n"
563
"SYM_FILE must end on \".sym\".\n"
564
"ALLO_FILE must end on \".all\".\n"
565
"LEX_FILE must end on \".lex\".\n"
566
"PRELEX_FILE must end on \".prelex\".\n" );
570
for (i = 1; i < argc; i++)
572
if (has_extension( argv[i], "pro" ))
573
set_file_name( &project_file, argv[i] );
574
else if (has_extension( argv[i], "lex" ))
575
set_file_name( &lexicon_file, argv[i] );
576
else if (has_extension( argv[i], "all" ))
577
set_binary_file_name( &rule_file, argv[i] );
578
else if (has_extension( argv[i], "sym" ))
579
set_binary_file_name( &symbol_file, argv[i] );
580
else if (has_extension( argv[i], "prelex") )
581
set_binary_file_name( &prelex_file, argv[i] );
582
else if (strcmp_no_case( argv[i], "-binary" ) == 0
583
|| strcmp_no_case( argv[i], "-b" ) == 0)
585
mallex_mode = BINARY_MODE;
587
else if (strcmp_no_case( argv[i], "-readable" ) == 0
588
|| strcmp_no_case( argv[i], "-r" ) == 0)
590
mallex_mode = TEXT_MODE;
592
else if (strcmp_no_case( argv[i], "-prelex" ) == 0
593
|| strcmp_no_case( argv[i], "-p" ) == 0)
595
mallex_mode = PRELEX_MODE;
598
complain( "Illegal argument \"%s\".", argv[i] );
600
if (project_file != NULL)
601
read_project_file( project_file );
602
if (char_set == NULL)
603
char_set = new_string( "iso8859-1", NULL );
604
if (rule_file == NULL)
605
complain( "Missing allomorph rule file name." );
606
if (symbol_file == NULL)
607
complain( "Missing symbol file name." );
611
init_symbols( symbol_file );
614
init_lex_compiler( rule_file );
617
/* Set mallex options to default values. */
618
options = mallex_options;
619
allo_format = new_string( "%s: %f", NULL );
622
/* Set mallex options by user scripts. */
623
if (project_file != NULL)
624
execute_set_commands( project_file, "mallex:" );
625
malagarc_path = NULL;
628
malagarc_path = absolute_path( "~/.malagarc", NULL );
635
malagarc_path = absolute_path( "~\\malaga.ini", NULL );
640
if (malagarc_path != NULL && file_exists( malagarc_path ))
641
execute_set_commands( malagarc_path, "mallex:" );
642
free_mem( &malagarc_path );
644
if (mallex_mode == INTERACTIVE_MODE)
646
init_debugger( display_where, mallex_commands );
647
rule_systems[0].rule_sys = allo_rule_sys;
648
rule_systems[0].name = "all";
649
init_breakpoints( 1, rule_systems );
651
command_loop( program_name, mallex_commands );
652
terminate_breakpoints();
653
terminate_debugger();
657
if (lexicon_file == NULL)
658
complain( "missing lexicon file name" );
662
generate_allos_for_file( lexicon_file, NULL, TRUE );
663
print_lex_tree( stdout, allo_format );
666
generate_allos_for_file( lexicon_file, prelex_file, TRUE );
667
set_binary_file_name( &object_file, lexicon_file );
668
write_lex_tree( object_file );
669
free_mem( &object_file );
672
generate_allos_for_file( lexicon_file, prelex_file, FALSE );
673
s = replace_extension( lexicon_file, "prelex" );
674
set_binary_file_name( &object_file, s );
676
write_prelex_file( object_file );
677
free_mem( &object_file );
680
complain( "Internal error." );
682
print_lex_statistics( stderr );
686
free_mem( &base_fs_string );
687
free_mem( &allo_format );
688
stop_display_process();
689
terminate_lex_compiler();
692
terminate_transmit();
695
terminate_patterns();
697
free_mem( &rule_file );
698
free_mem( &symbol_file );
699
free_mem( &lexicon_file );
700
free_mem( &project_file );
701
free_mem( &char_set );
707
/* End of file. =============================================================*/