1
/* This file is part of Malaga, a system for Natural Language Analysis.
2
* Copyright (C) 1995-1999 Bjoern Beutel
5
* Universitaet Erlangen-Nuernberg
6
* Abteilung fuer Computerlinguistik
9
* e-mail: malaga@linguistik.uni-erlangen.de
11
* This program is free software; you can redistribute it and/or modify
12
* it under the terms of the GNU General Public License as published by
13
* the Free Software Foundation; either version 2 of the License, or
14
* (at your option) any later version.
16
* This program is distributed in the hope that it will be useful,
17
* but WITHOUT ANY WARRANTY; without even the implied warranty of
18
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
* GNU General Public License for more details.
21
* You should have received a copy of the GNU General Public License
22
* along with this program; if not, write to the Free Software
23
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
25
/* description ==============================================================*/
27
/* This file contains data structures and functions used for grammatical
30
/* includes =================================================================*/
38
#include "rule_type.h"
47
/* types ====================================================================*/
49
typedef struct TREE_NODE_T /* a rule application is stored in "tree_node" */
51
struct TREE_NODE_T *mother; /* predecessor of this tree node */
52
struct TREE_NODE_T *first_daughter; /* first successor of this tree node */
53
struct TREE_NODE_T *sister; /* alternative tree node */
54
tree_node_type_t type; /* type of this tree node */
55
int_t rule; /* number of the executed rule */
56
int_t index; /* index of this tree node */
57
value_t right_cat; /* category of segment being added */
58
value_t result_cat; /* result cat of rule application */
59
int_t rule_set; /* successor rules (-1 for end state) */
60
string_t input; /* the input that is not yet analysed */
63
typedef struct STATE_T /* a state in morphological or syntactical analysis */
65
struct STATE_T *next; /* link to state with same or higher <input> */
66
value_t cat; /* category of input read so far */
67
string_t input; /* pointer to input that is analysed next */
68
int_t rule_set; /* set of rules to be applied */
69
tree_node_t *tree_node; /* tree node of rule application that created
70
* this state (NULL if no tree) */
71
int_t item_index; /* number of items read in so far */
74
typedef struct /* the structure for morphological and syntactical analysis */
76
pool_t state_pool; /* all states are saved in <state_pool> */
77
pool_t value_pool; /* all categories are saved in <value_pool> */
79
/* states are chained by their attribute <next>. There are three chains: */
80
state_t *running_states; /* states that need further analysis
81
* (in the order of their <input> indexes) */
82
state_t *end_states; /* end states */
83
state_t *free_states; /* states that can be reused */
86
/* variables ================================================================*/
88
/* structures used for LAG analysis, one for morphology and one for syntax. */
89
LOCAL analysis_t *analyses[2];
91
/* the data structure used to save the analysis tree */
92
LOCAL tree_node_t *root_tree_node; /* a pointer to the root tree node */
93
LOCAL pool_t tree_pool; /* pool where tree nodes are stored */
94
LOCAL int_t number_of_tree_nodes;
96
LOCAL state_t *next_result_state; /* needed for "get_next_analysis_result" */
97
LOCAL tree_node_t *next_tree_node; /* needed for "get_next_analysis_node" */
99
LOCAL string_t left_start, right_start, right_end;
100
/* start and end position of surface when rule is executed. READ ONLY! */
102
/* information needed to generate states and tree nodes */
104
analysis_t *analysis;
105
bool_t build_tree; /* entering tree nodes? */
106
bool_t analyse_all; /* analyse the whole input? */
107
int_t rule; /* rule just executed */
108
value_t right_cat; /* right category */
109
tree_node_t *mother; /* predecessor tree node */
110
int_t item_index; /* index of item that is added */
111
string_t input; /* end of analysed input */
114
LOCAL bool_t options[NUM_ANALYSIS_OPTIONS];
116
/* functions for analysis options ===========================================*/
118
GLOBAL void set_analysis_option (analysis_option_t selected, bool_t setting)
119
/* Set analysis option <selected> to <setting>. */
124
if (rule_system[MORPHOLOGY]->robust_rule == -1)
125
error ("no morphology \"robust_rule\"");
128
if (rule_system[SYNTAX] == NULL
129
|| rule_system[SYNTAX]->pruning_rule == -1)
130
error ("no morphology \"pruning_rule\"");
132
case MOR_OUT_FILTER_OPTION:
133
if (rule_system[MORPHOLOGY]->output_filter == -1)
134
error ("no morphology \"output_filter\"");
136
case SYN_OUT_FILTER_OPTION:
137
if (rule_system[SYNTAX] == NULL
138
|| rule_system[SYNTAX]->output_filter == -1)
139
error ("no syntax \"output_filter\"");
141
case SYN_IN_FILTER_OPTION:
142
if (rule_system[SYNTAX] == NULL
143
|| rule_system[SYNTAX]->input_filter == -1)
144
error ("no syntax \"input_filter\"");
149
error ("internal (unknown option)");
151
options[selected] = setting;
154
/*---------------------------------------------------------------------------*/
156
GLOBAL bool_t get_analysis_option (analysis_option_t selected)
157
/* return the current setting of analysis option <selected>. */
159
return options[selected];
162
/* functions for segmentation and preprocessing =============================*/
164
LOCAL bool_t is_word_part (string_t string)
165
/* Return whether the character *<string> may be part of a word. */
167
/* *<string> is part of a word if it is a letter, a digit
168
* or one of {".", ",", "-"} followed by a letter or a digit. */
169
return (IS_ALPHA (*string) || isdigit (*string)
170
|| ((*string == ',' || *string == '.' || *string == '-')
171
&& (IS_ALPHA (string[1]) || isdigit (string[1]))));
174
/*---------------------------------------------------------------------------*/
176
GLOBAL void preprocess_input (string_t input)
177
/* Delete heading and trailing spaces in <input>
178
* and compress all whitespace sequences to a single space. */
180
string_t input_ptr, output_ptr;
184
/* Cut heading spaces. */
185
input_ptr = next_non_space (input);
186
while (*input_ptr != EOS)
188
if (isspace (*input_ptr))
190
/* Overread all whitespace and write a single space. */
191
input_ptr = next_non_space (input_ptr);
195
*output_ptr++ = *input_ptr++;
198
/* Cut trailing spaces. */
199
while (output_ptr > input && isspace (output_ptr[-1]))
204
/* functions for state list processing ======================================*/
206
LOCAL void remove_state (analysis_t *analysis, state_t **state_list_ptr)
207
/* Remove state <state_list_ptr> points to and enter it into the free list. */
209
state_t *state = *state_list_ptr;
213
/* Unlink from old list. */
214
*state_list_ptr = state->next;
216
/* Enter in free list. */
217
state->next = analysis->free_states;
218
analysis->free_states = state;
222
/*---------------------------------------------------------------------------*/
224
LOCAL state_t *insert_state (analysis_t *analysis,
225
state_t **state_list_ptr,
230
/* Insert a state, composed of <cat>, <input>, <rule_set>, and <item_index>
231
* in the list *<state_list_ptr> points to, in front of all states with a
232
* higher <input> index. Return this state. */
236
if (analysis->free_states != NULL)
238
/* Get first state in the free list. */
239
new_state = analysis->free_states;
240
analysis->free_states = new_state->next;
243
new_state = (state_t *) get_pool_space (analysis->state_pool, 1, NULL);
246
new_state->cat = cat;
247
new_state->input = input;
248
new_state->rule_set = rule_set;
249
new_state->item_index = item_index;
250
new_state->tree_node = NULL;
252
/* Insert new state in list. */
253
while (*state_list_ptr != NULL && (*state_list_ptr)->input <= input)
254
state_list_ptr = &(*state_list_ptr)->next;
255
new_state->next = *state_list_ptr;
256
*state_list_ptr = new_state;
261
/*---------------------------------------------------------------------------*/
263
LOCAL tree_node_t *add_tree_node (value_t result_cat,
266
tree_node_type_t type)
267
/* Add a tree node for a rule that fired with <result_cat> and <rule_set>,
268
* where <input> is yet to be analysed. */
270
tree_node_t **tree_node_ptr;
271
tree_node_t *tree_node;
273
/* Get a new tree node. */
274
tree_node = (tree_node_t *) get_pool_space (tree_pool, 1, NULL);
276
tree_node->mother = state_info.mother;
277
tree_node->first_daughter = NULL;
278
tree_node->sister = NULL;
279
tree_node->type = type;
280
tree_node->rule = state_info.rule;
281
tree_node->index = number_of_tree_nodes;
282
tree_node->right_cat = state_info.right_cat;
283
tree_node->result_cat = result_cat;
284
tree_node->rule_set = rule_set;
285
tree_node->input = input;
287
/* Link the tree node into the tree structure. */
288
tree_node_ptr = &state_info.mother->first_daughter;
289
while (*tree_node_ptr != NULL)
290
tree_node_ptr = &(*tree_node_ptr)->sister;
291
*tree_node_ptr = tree_node;
293
/* Increment the number of tree nodes. */
294
number_of_tree_nodes++;
299
/*---------------------------------------------------------------------------*/
301
LOCAL void add_state (state_t **list,
304
tree_node_type_t type)
305
/* Add state, consisting of <cat> and <rule_set> in list **<list>.
306
* When <state_info.build_tree> == TRUE, also generate a tree node. */
311
/* Preserve the category. */
312
new_value = copy_value_to_pool (state_info.analysis->value_pool, cat, NULL);
314
state = insert_state (state_info.analysis, list, new_value, state_info.input,
315
rule_set, state_info.item_index);
316
if (state_info.build_tree)
317
state->tree_node = add_tree_node (new_value, state_info.input, rule_set,
321
/* callback functions needed by rules =======================================*/
323
LOCAL void local_add_end_state (value_t cat)
324
/* Add a state, consisting of <cat>, as an end state. */
326
string_t input = state_info.input;
328
/* Only add an end state if at the end of input
329
* or at the end of a word in a subordinate morphological analysis. */
330
if (*input == EOS || ! (state_info.analyse_all
331
|| (is_word_part (input-1) && is_word_part (input))))
332
add_state (&state_info.analysis->end_states, cat, -1, FINAL_NODE);
335
/*---------------------------------------------------------------------------*/
337
LOCAL void local_add_running_state (value_t cat, int_t rule_set)
338
/* Add a running state, consisting of <cat> and <rule_set>. */
340
add_state (&state_info.analysis->running_states, cat, rule_set, INTER_NODE);
343
/*---------------------------------------------------------------------------*/
345
LOCAL string_t local_get_surface (surface_t surface_type)
346
/* Return surface <surface_type> for currently executed rule.
347
* The result must be freed after use. */
351
if (right_start > left_start && right_start[-1] == ' ')
352
left_end = right_start - 1;
354
left_end = right_start;
356
switch (surface_type)
359
return new_string_readable (left_start, left_end);
361
return new_string_readable (right_start, right_end);
363
return new_string_readable (left_start, right_end);
365
error ("internal (unknown surface type)");
369
/* analysis functions =======================================================*/
371
LOCAL analysis_t *new_analysis (void)
372
/* Create a new analysis structure. */
374
analysis_t *analysis = new_mem (sizeof (analysis_t));
376
analysis->state_pool = new_pool (sizeof (state_t));
377
analysis->value_pool = new_pool (sizeof (cell_t));
378
analysis->running_states = NULL;
379
analysis->end_states = NULL;
380
analysis->free_states = NULL;
385
/*---------------------------------------------------------------------------*/
387
LOCAL void free_analysis (analysis_t **analysis)
388
/* Destroy an analysis structure. */
390
if (*analysis != NULL)
392
free_pool (&(*analysis)->state_pool);
393
free_pool (&(*analysis)->value_pool);
398
/*---------------------------------------------------------------------------*/
400
GLOBAL void init_analysis (string_t morphology_file, string_t syntax_file)
401
/* Initialise the analysis module.
402
* <morphology_file> and <syntax_file> are the rule files to load.
403
* <syntax_file> may be NULL. */
407
/* Read rule files. */
408
rule_system[MORPHOLOGY] = read_rule_sys (morphology_file);
409
if (syntax_file != NULL)
410
rule_system[SYNTAX] = read_rule_sys (syntax_file);
412
/* Init analysis structure. */
413
analyses[MORPHOLOGY] = new_analysis ();
414
analyses[SYNTAX] = new_analysis ();
415
tree_pool = new_pool (sizeof (tree_node_t));
417
/* Set analysis options to start values. */
418
for (i = 0; i < NUM_ANALYSIS_OPTIONS; i++)
420
options[MOR_OUT_FILTER_OPTION] =
421
(rule_system[MORPHOLOGY]->output_filter != -1);
422
options[SYN_IN_FILTER_OPTION] =
423
(rule_system[SYNTAX] != NULL && rule_system[SYNTAX]->input_filter != -1);
424
options[SYN_OUT_FILTER_OPTION] =
425
(rule_system[SYNTAX] != NULL && rule_system[SYNTAX]->output_filter != -1);
428
/*---------------------------------------------------------------------------*/
430
GLOBAL void terminate_analysis (void)
431
/* Terminate the analysis module. */
433
free_rule_sys (&rule_system[SYNTAX]);
434
free_rule_sys (&rule_system[MORPHOLOGY]);
435
free_analysis (&analyses[MORPHOLOGY]);
436
free_analysis (&analyses[SYNTAX]);
437
free_pool (&tree_pool);
442
/*---------------------------------------------------------------------------*/
444
GLOBAL bool_t reset_analysis_results (void)
445
/* Restart to read analysis results.
446
* Return TRUE iff there are any analysis results. */
448
next_result_state = analyses[top_grammar]->end_states;
449
return next_result_state != NULL;
452
/*---------------------------------------------------------------------------*/
454
GLOBAL value_t get_next_analysis_result (void)
455
/* Return the category of the next analysis result. */
457
if (next_result_state != NULL)
459
value_t result = next_result_state->cat;
461
next_result_state = next_result_state->next;
468
/*---------------------------------------------------------------------------*/
470
GLOBAL bool_t reset_analysis_nodes (void)
471
/* Restart to read analysis nodes.
472
* Return TRUE iff there are any analysis nodes. */
474
next_tree_node = root_tree_node;
475
return next_tree_node != NULL;
478
/*---------------------------------------------------------------------------*/
480
GLOBAL void free_analysis_node (analysis_node_t **node)
481
/* Free the memory occupied by <node>. */
485
free_mem (&(*node)->right_surf);
486
free_mem (&(*node)->result_surf);
487
free_mem (&(*node)->rule_set);
492
/*---------------------------------------------------------------------------*/
494
GLOBAL analysis_node_t *get_next_analysis_node (void)
495
/* Return the next analysis tree node of the last call of "analyse_item".
496
* Return NULL if there is no more node.
497
* The node must be freed with "free_analysis_node" after use. */
499
analysis_node_t *node;
500
string_t right_start;
501
rule_sys_t *rule_sys = rule_system[top_grammar];
503
if (next_tree_node == NULL)
506
node = new_mem (sizeof (analysis_node_t));
508
/* Set node index. */
509
node->index = next_tree_node->index;
512
node->type = next_tree_node->type;
514
/* Set mother index. */
515
if (next_tree_node->mother == NULL)
516
node->mother_index = -1;
518
node->mother_index = next_tree_node->mother->index;
521
if (next_tree_node->rule != -1)
522
node->rule_name = (rule_sys->strings
523
+ rule_sys->rules[next_tree_node->rule].name);
525
node->rule_name = NULL;
527
/* Set right surface and category. */
528
if (next_tree_node->mother == NULL) /* no predecessor */
529
right_start = last_analysis_input;
531
right_start = next_non_space (next_tree_node->mother->input);
532
if (right_start != next_tree_node->input)
533
node->right_surf = new_string (right_start, next_tree_node->input);
534
node->right_cat = next_tree_node->right_cat;
536
/* Set result surface. */
537
node->result_surf = new_string (last_analysis_input, next_tree_node->input);
538
node->result_cat = next_tree_node->result_cat;
541
if (next_tree_node->result_cat != NULL)
542
node->rule_set = rule_set_readable (rule_sys, next_tree_node->rule_set);
544
/* Update <next_tree_node>. */
545
if (next_tree_node->first_daughter != NULL)
546
next_tree_node = next_tree_node->first_daughter;
547
else if (next_tree_node->sister != NULL)
548
next_tree_node = next_tree_node->sister;
549
else /* Go back to the next node not yet visited. */
553
next_tree_node = next_tree_node->mother;
554
if (next_tree_node == NULL)
557
if (next_tree_node->sister != NULL)
559
next_tree_node = next_tree_node->sister;
568
/*---------------------------------------------------------------------------*/
570
LOCAL string_t get_word_end (string_t input, bool_t analyse_all)
571
/* Return the end of the word that starts at <input>. */
576
input_end = input + strlen (input);
577
else if (! is_word_part (input))
578
input_end = input + 1;
581
input_end = input + 1;
582
while (is_word_part (input_end))
589
/*---------------------------------------------------------------------------*/
591
LOCAL bool_t get_from_cache (analysis_t *analysis, string_t input,
593
/* If next word at <input> is in analysis cache,
594
* enter its results in <analysis>, and return TRUE. Otherwise return FALSE. */
596
string_t input_end = get_word_end (input, analyse_all);
598
if (word_in_cache (input, input_end))
604
result = next_result_in_cache ();
608
insert_state (analysis, &analysis->end_states, result, input_end, -1, 0);
616
/*---------------------------------------------------------------------------*/
618
LOCAL void put_into_cache (analysis_t *analysis, string_t input,
620
/* Store the results in <analysis> for the word form that starts at <input>
626
string_t input_end = get_word_end (input, analyse_all);
628
/* Count categories. */
630
for (state = analysis->end_states; state != NULL; state = state->next)
632
if (state->input != input_end)
633
/* Only put into cache if all entries will be found in cache. */
638
/* Allocate a new vector which takes the categories. */
639
cats = new_vector (sizeof (value_t), num_cats);
640
for (i = 0, state = analysis->end_states;
641
state != NULL && state->input == input_end;
642
i++, state = state->next)
643
cats[i] = new_value (state->cat);
645
enter_in_cache (input, input_end, num_cats, cats);
648
/*---------------------------------------------------------------------------*/
650
LOCAL void execute_robust_rule (analysis_t *analysis,
651
rule_sys_t *rule_sys,
654
/* Execute robust_rule in <rule_sys> for the next word in <analysis>.
655
* Word starts at <input>.
656
* Word must extend until end of string iff <analyse_all> == TRUE. */
660
input_end = get_word_end (input, analyse_all);
662
/* Set debugging information. */
665
right_end = input_end;
667
/* Setup <state_info>. */
668
state_info.analysis = analysis;
669
state_info.build_tree = FALSE;
670
state_info.analyse_all = analyse_all;
671
state_info.item_index = 1;
672
state_info.input = input_end;
675
push_string_value (input, input_end);
676
execute_rule (rule_sys, rule_sys->robust_rule);
677
if (analysis->end_states != NULL && analyse_all)
678
recognised_by_robust_rule = TRUE;
681
/*---------------------------------------------------------------------------*/
683
LOCAL void execute_filter_rule (analysis_t *analysis,
684
rule_sys_t *rule_sys,
687
/* Execute <filter_rule> in <rule_sys> for <analysis>. */
689
state_t *old_end_states = analysis->end_states;
691
/* Go through all results with the same length. */
692
old_end_states = analysis->end_states;
693
analysis->end_states = NULL;
694
while (old_end_states != NULL)
698
string_t input = old_end_states->input;
700
/* Count number of results. */
702
for (state = old_end_states;
703
state != NULL && state->input == input;
707
/* Create a list with the results of all states and remove states. */
709
for (i = 0; i < results; i++)
711
push_value (old_end_states->cat);
712
remove_state (analysis, &old_end_states);
714
build_list (results);
716
/* Set debugging information. */
717
right_start = right_end = input;
719
state_info.analysis = analysis;
720
state_info.build_tree = FALSE;
721
state_info.analyse_all = analyse_all;
722
state_info.item_index = 0;
723
state_info.input = input;
724
execute_rule (rule_sys, filter_rule);
729
/*---------------------------------------------------------------------------*/
731
LOCAL void execute_pruning_rule (analysis_t *analysis, grammar_t grammar)
732
/* Execute pruning_rule in <grammar> for the running states in <analysis>. */
737
string_t input = analysis->running_states->input;
738
rule_sys_t *rule_sys = rule_system[grammar];
742
/* Create a list that contains the results. */
745
for (state = analysis->running_states;
746
state != NULL && state->input == input;
750
push_value (state->cat);
752
build_list (results);
754
/* Set debugging information. */
755
right_start = right_end = input;
757
execute_rule (rule_sys, rule_sys->pruning_rule);
759
list = value_stack[top-1];
760
if (get_value_type (list) != LIST_SYMBOL)
761
error ("pruning rule result must be a list");
762
if (get_list_length (list) != results)
763
error ("pruning rule result must have as much elements as rule argument");
765
state_ptr = &analysis->running_states;
766
for (i = 0; i < results; i++)
768
symbol = value_to_symbol (get_element (list, i + 1));
769
if (symbol == NO_SYMBOL)
771
if ((*state_ptr)->tree_node != NULL)
772
(*state_ptr)->tree_node->type = PRUNED_NODE;
773
remove_state (analysis, state_ptr);
775
else if (symbol == YES_SYMBOL)
776
state_ptr = &(*state_ptr)->next;
778
error ("pruning rule result list may only contain yes/no symbols");
782
/*---------------------------------------------------------------------------*/
784
LOCAL void execute_rules (analysis_t *analysis,
785
rule_sys_t *rule_sys,
788
string_t right_surf_start,
789
string_t right_surf_end,
791
rule_type_t rule_type,
793
/* Execute the successor rules in <rule_sys> for <state> in <analysis>.
794
* Consume the segment from <right_surf_start> to <right_surf_end>
795
* with category <right_cat>. Enter a tree node if <build_tree> == TRUE. */
798
bool_t rules_successful;
800
/* Setup <state_info>. */
801
state_info.analysis = analysis;
802
state_info.build_tree = build_tree;
803
state_info.analyse_all = analyse_all;
804
state_info.right_cat = right_cat;
805
state_info.mother = state->tree_node;
806
state_info.item_index = state->item_index + 1;
807
state_info.input = right_surf_end;
809
/* Set debugging information. */
810
right_start = right_surf_start;
811
right_end = right_surf_end;
813
/* Check if we are now executing the rules for a state to be debugged */
814
if (debug_state != NULL && state->tree_node != NULL)
815
debug_state (state->tree_node->index);
817
rules_successful = FALSE;
818
for (rule_ptr = rule_sys->rule_sets + state->rule_set;
824
if (rule_type == END_RULE || rules_successful)
827
else if (rule_sys->rules[*rule_ptr].type == rule_type)
829
state_info.rule = *rule_ptr;
831
push_value (state->cat);
832
push_value (right_cat);
833
push_string_value (right_surf_start, right_surf_end);
834
push_number_value (state_info.item_index);
835
execute_rule (rule_sys, *rule_ptr);
836
rules_successful |= rule_successful;
840
if (build_tree && rule_type != END_RULE)
842
/* Enter a tree node for a rule set that did not fire. */
843
state_info.rule = -1;
844
add_tree_node (NULL, right_surf_end, -1, BREAK_NODE);
848
/*---------------------------------------------------------------------------*/
850
GLOBAL void analyse (grammar_t grammar,
854
/* Perform a LAG analysis of <input> using <grammar> (MORPHOLOGY or SYNTAX).
855
* An analysis tree will be built if <build_tree> == TRUE.
856
* The whole input will be analysed if <analyse_all> == TRUE. */
858
rule_sys_t *rule_sys;
859
state_t *initial_state;
860
analysis_t *analysis;
864
top_grammar = grammar;
865
root_tree_node = NULL;
866
last_analysis_input = input;
867
recognised_by_robust_rule = recognised_by_combi_rules = FALSE;
870
analysis = analyses[grammar];
871
rule_sys = rule_system[grammar];
872
if (rule_sys == NULL)
873
error ("missing rule system");
875
/* Set callback functions for <execute_rules>. */
876
add_running_state = local_add_running_state;
877
add_end_state = local_add_end_state;
879
/* Reset the analysis, we start anew. */
880
analysis->running_states = NULL;
881
analysis->end_states = NULL;
882
analysis->free_states = NULL;
883
clear_pool (analysis->state_pool);
884
clear_pool (analysis->value_pool);
886
/* Set debug information. */
887
get_surface = local_get_surface;
890
if (grammar == MORPHOLOGY && options[CACHE_OPTION] && ! build_tree)
892
if (get_from_cache (analysis, input, analyse_all))
896
/* Enter the initial state. */
897
DB_ASSERT (rule_sys->initial_cat < rule_sys->values_size);
898
initial_state = insert_state (analysis, &analysis->running_states,
900
+ rule_sys->initial_cat,
901
input, rule_sys->initial_rule_set, 0);
905
/* Clear all tree nodes and setup <root_tree_node>. */
906
clear_pool (tree_pool);
907
root_tree_node = (tree_node_t *) get_pool_space (tree_pool, 1, NULL);
908
root_tree_node->mother = NULL;
909
root_tree_node->first_daughter = NULL;
910
root_tree_node->sister = NULL;
911
root_tree_node->type = INTER_NODE;
912
root_tree_node->rule = -1;
913
root_tree_node->index = 0;
914
root_tree_node->right_cat = NULL;
915
root_tree_node->result_cat = rule_sys->values + rule_sys->initial_cat;
916
root_tree_node->rule_set = rule_sys->initial_rule_set;
917
root_tree_node->input = input;
918
initial_state->tree_node = root_tree_node;
919
number_of_tree_nodes = 1;
922
/* Analyse while there are running states. */
923
while (analysis->running_states != NULL)
926
string_t current_input = analysis->running_states->input;
928
if (options[PRUNING_OPTION] && current_input > input
929
&& rule_sys->pruning_rule != -1)
930
execute_pruning_rule (analysis, grammar);
932
/* Apply end_rules only if all input has been parsed
933
* or if in subordinate analysis. */
934
if (current_input > input
935
&& (*current_input == EOS
936
|| ! (analyse_all || (is_word_part (current_input-1)
937
&& is_word_part (current_input)))))
939
/* Apply all end_rules to states at <current_input>. */
940
for (state = analysis->running_states;
941
state != NULL && state->input == current_input;
943
execute_rules (analysis, rule_sys, state, NULL, current_input,
944
current_input, build_tree, END_RULE, analyse_all);
947
/* If analysis has consumed all input, leave. */
948
if (*current_input == EOS)
951
if (grammar == MORPHOLOGY)
953
string_t right_surf_end; /* end of surface of the next allomorph */
956
/* Look for prefixes of increasing length
957
* that match the string at <current_input>. */
958
search_for_prefix (current_input);
959
while (get_next_prefix (&right_surf_end, &cat))
961
/* Apply this next-variable to all morphological states. */
962
for (state = analysis->running_states;
963
state != NULL && state->input == current_input;
965
execute_rules (analysis, rule_sys, state, cat, current_input,
966
right_surf_end, build_tree, COMBI_RULE, analyse_all);
969
else /* <grammar> == SYNTAX */
971
state_t *morph_result;
972
string_t input_behind_space = next_non_space (current_input);
974
/* Call morphological analysis to get right-categories. */
975
analyse (MORPHOLOGY, input_behind_space, FALSE, FALSE);
977
/* Execution of morphology rules has changed <left_start>. */
980
/* Step through all morphological results. */
981
for (morph_result = analyses[MORPHOLOGY]->end_states;
982
morph_result != NULL;
983
morph_result = morph_result->next)
985
/* The morphology pool may be cleared,
986
* so copy <cat> to the syntax pool. */
987
value_t right_cat = copy_value_to_pool (analysis->value_pool,
988
morph_result->cat, NULL);
990
/* Apply this right category to all syntactic states. */
991
for (state = analysis->running_states;
992
state != NULL && state->input == current_input;
994
execute_rules (analysis, rule_sys, state, right_cat,
995
input_behind_space, morph_result->input,
996
build_tree, COMBI_RULE, TRUE);
1000
/* We have combined all analyses at <current_input> with all states
1001
* that were at <current_input>, so we can kill these states. */
1002
while (analysis->running_states != NULL
1003
&& analysis->running_states->input == current_input)
1004
remove_state (analysis, &analysis->running_states);
1005
} /* end of loop that consumes all running states */
1007
if (analysis->end_states != NULL)
1008
recognised_by_combi_rules = TRUE;
1010
if (grammar == MORPHOLOGY)
1012
if (analysis->end_states == NULL && options[ROBUST_OPTION])
1013
execute_robust_rule (analysis, rule_sys, input, analyse_all);
1015
if (options[MOR_OUT_FILTER_OPTION])
1016
execute_filter_rule (analysis, rule_system[MORPHOLOGY],
1017
rule_system[MORPHOLOGY]->output_filter,
1020
if (options[SYN_IN_FILTER_OPTION])
1021
execute_filter_rule (analysis, rule_system[SYNTAX],
1022
rule_system[SYNTAX]->input_filter, analyse_all);
1024
if (options[CACHE_OPTION] && ! build_tree)
1025
put_into_cache (analysis, input, analyse_all);
1027
else /* grammar == SYNTAX */
1029
if (options[SYN_OUT_FILTER_OPTION])
1030
execute_filter_rule (analysis, rule_system[SYNTAX],
1031
rule_system[SYNTAX]->output_filter, analyse_all);
1035
/* end of file ==============================================================*/