1
/* Copyright (C) 1995 Bjoern Beutel. */
3
/* Description. =============================================================*/
5
/* This module implements a static trie structure */
7
/* Includes. ================================================================*/
18
/* Macros. ==================================================================*/
20
#define ALIGN(address) (((ptr_t) (address) + (ptr_t) 3) & ~ (ptr_t) 3)
21
/* Align ADDRESS to next 32-bit int. */
23
#define INTS(bytes) (((bytes) + (ptr_t ) 3) >> 2)
24
/* Number of ints needed to store BYTES. */
26
/* Types. ===================================================================*/
28
typedef struct /* A node in a list of keys. */
31
char_t key; /* Key for this node. */
32
int_t subnode; /* Index of node for KEY in trie. */
33
int_t content; /* Content associated with KEY. */
36
typedef struct /* A dynamic trie node. */
38
string_t prefix; /* The prefix of this node. */
39
int_t prefix_len; /* The length of PREFIX. */
40
list_t key_list; /* List of keys. */
43
/* The trie is a vector of int_t that contains compact trie nodes.
44
* A compact trie node is int_t-aligned and looks as follows:
45
* u_byte_t prefix_len;
46
* char_t prefix[ prefix_len ];
47
* u_byte_t subnode_count;
48
* u_byte_t content_count;
49
* char_t subnode_keys[ subnode_count ];
50
* char_t content_keys[ content_count ];
51
* 0..3 pad bytes of value 0;
52
* int_t subnodes[ subnode_count ];
53
* int_t contents[ content_count ]; */
55
typedef struct /* Pointers to the items of a compact trie node. */
57
u_byte_t *prefix_len; /* Number of chars that precede the keys. */
58
char_t *prefix; /* The chars that precede the keys. */
59
u_byte_t *subnode_count; /* The number of subnodes in this node. */
60
u_byte_t *content_count; /* The number of contents in this node. */
61
char_t *subnode_keys; /* The keys for the subnodes in this node. */
62
char_t *content_keys; /* The keys for the contents in this node. */
63
/* 0..3 pad bytes of value 0; */
64
int_t *subnodes; /* Indexes of the subnodes. */
65
int_t *contents; /* Contents. */
68
/*---------------------------------------------------------------------------*/
71
store_trie_node( pool_t trie_pool, trie_node_t *node )
72
/* Store NODE in TRIE_POOL and return its index. */
74
int_t node_size; /* Size of the trie node in TRIE_POOL. */
75
int_t node_index; /* Index of the trie node in TRIE_POOL. */
76
int_t i, j, subnode_count, content_count;
79
compact_node_t n; /* Pointers to the items of the compactified NODE. */
81
/* Count the number of entries in NODE->KEY_LIST. */
82
subnode_count = content_count = 0;
83
FOREACH( key_node, node->key_list )
85
if (key_node->subnode != -1)
87
if (key_node->content != -1)
91
/* Get pool space for the new node. */
92
node_size = (INTS( sizeof( u_byte_t ) + sizeof( char_t ) * node->prefix_len
93
+ sizeof( u_byte_t ) + sizeof( char_t ) * subnode_count
94
+ sizeof( u_byte_t ) + sizeof( char_t ) * content_count )
95
+ subnode_count + content_count);
96
compact_node = get_pool_space( trie_pool, node_size, &node_index );
98
/* Set up the pointers to the items of the node. */
99
n.prefix_len = (u_byte_t *) compact_node;
100
n.prefix = (char_t *) (n.prefix_len + 1);
101
n.subnode_count = (u_byte_t *) (n.prefix + node->prefix_len);
102
n.content_count = (u_byte_t *) (n.subnode_count + 1);
103
n.subnode_keys = (char_t *) (n.content_count + 1);
104
n.content_keys = (char_t *) (n.subnode_keys + subnode_count);
105
n.subnodes = (int_t *) ALIGN( n.content_keys + content_count );
106
n.contents = (int_t *) ALIGN( n.subnodes + subnode_count );
108
/* Copy the items. */
109
*n.prefix_len = node->prefix_len;
110
for (i = 0; i < node->prefix_len; i++)
111
n.prefix[i] = TO_LOWER( node->prefix[i] );
112
*n.subnode_count = subnode_count;
113
*n.content_count = content_count;
115
FOREACH( key_node, node->key_list )
117
if (key_node->subnode != -1)
119
n.subnode_keys[i] = key_node->key;
120
n.subnodes[i] = key_node->subnode;
123
if (key_node->content != -1)
125
n.content_keys[j] = key_node->key;
126
n.contents[j] = key_node->content;
134
/*---------------------------------------------------------------------------*/
137
new_subtrie( pool_t trie_pool, trie_entry_t entries[],
138
int_t start, int_t end, int_t index )
139
/* Create a subtrie for ENTRIES[ START ]..ENTRIES[ END - 1 ],
140
* store it in TRIE_POOL and return the index of its root node.
141
* The first INDEX chars in the keys will be ignored, so they
142
* must match for the keys in ENTRIES[ START ]..ENTRIES[ END - 1 ]. */
145
int_t key_idx; /* The key index. */
147
int_t sub_start, sub_end; /* Start and end of a subtrie. */
148
string_t first_key, last_key; /* Used as abbreviations. */
149
key_node_t *key_node;
154
/* Look how many chars from INDEX onward are equal in the first and
155
* the last entry. This will be the prefix of the new node.
156
* If the key of the first entry is as long as the prefix, shorten the prefix
158
first_key = entries[ start ].key;
159
last_key = entries[ end - 1 ].key;
160
for (key_idx = index; first_key[ key_idx + 1 ] != EOS; key_idx++)
162
if (TO_LOWER( first_key[ key_idx ] ) != TO_LOWER( last_key[ key_idx ] ))
165
node.prefix = first_key + index;
166
node.prefix_len = key_idx - index;
168
/* Scan list of entries for the first char behind the prefix. For each key,
169
* find the corresponding lower and upper index in the list of entries. */
170
clear_list( &node.key_list );
171
for (sub_start = start; sub_start < end; sub_start = sub_end)
173
/* Create a new node for the current key. */
174
key_node = new_node( &node.key_list, sizeof( key_node_t ), LIST_END );
175
key_node->key = TO_LOWER( entries[ sub_start ].key[ key_idx ] );
176
if (entries[ sub_start ].key[ key_idx + 1 ] == EOS)
178
key_node->content = entries[ sub_start ].content;
182
key_node->content = -1;
184
/* Find last entry with same key. */
185
for (sub_end = sub_start; sub_end < end; sub_end++)
187
if (TO_LOWER( entries[ sub_end ].key[ key_idx ] ) != key_node->key)
190
key_node->subnode = new_subtrie( trie_pool, entries, sub_start, sub_end,
194
/* Store compact node and clear the key list. */
195
node_index = store_trie_node( trie_pool, &node );
196
while (node.key_list.first != NULL)
197
free_first_node( &node.key_list );
202
/*---------------------------------------------------------------------------*/
205
new_trie( int_t n, trie_entry_t *entries,
206
pool_t *trie_pool, int_t *root_node_index )
207
/* Take N ENTRIES and build a trie of them.
208
* Return the trie in *TRIE_POOL
209
* and the index of its root node in *ROOT_NODE_INDEX.
210
* The keys in *ENTRIES must be non-empty, unique, and sorted. */
212
*trie_pool = new_pool( sizeof( int_t ) );
213
*root_node_index = new_subtrie( *trie_pool, entries, 0, n, 0 );
216
/*---------------------------------------------------------------------------*/
219
lookup_trie( int_t *trie, int_t *node_index, string_t *input, int_t *content )
220
/* Test if a prefix of *INPUT matches the node at *NODE_INDEX in TRIE.
221
* If it does, return TRUE (else return FALSE) and:
222
* *CONTENT contains the associated content,
223
* *NODE contains the subnode for the matched input, and
224
* *INPUT points to the first char behind the prefix. */
226
int_t lower, upper, middle;
227
compact_node_t r; /* Pointers to the items of the root node; */
230
while (*node_index != -1)
232
/* Test if node's prefix matches the given key. */
233
r.prefix_len = (u_byte_t *) (trie + *node_index);
234
r.prefix = (char_t *) (r.prefix_len + 1);
235
if (strncmp_no_case( *input, r.prefix, *r.prefix_len ) != 0)
237
(*input) += *r.prefix_len;
239
/* Get the rest of the node. */
240
r.subnode_count = (u_byte_t *) (r.prefix + *r.prefix_len);
241
r.content_count = (u_byte_t *) (r.subnode_count + 1);
242
r.subnode_keys = (char_t *) (r.content_count + 1);
243
r.content_keys = (char_t *) (r.subnode_keys + *r.subnode_count);
244
r.subnodes = (int_t *) ALIGN( r.content_keys + *r.content_count );
245
r.contents = (int_t *) (r.subnodes + *r.subnode_count);
247
/* Perform binary search for subnode with given key. */
248
c = TO_LOWER( **input );
251
upper = *r.subnode_count - 1;
252
while (lower <= upper)
254
middle = (lower + upper) / 2;
255
if (ORD(c) < ORD( r.subnode_keys[ middle ] ))
257
else if (ORD(c) > ORD( r.subnode_keys[ middle ] ))
259
else /* This entry matches. */
261
*node_index = r.subnodes[ middle ];
266
/* Perform binary search for content with given key. */
269
upper = *r.content_count - 1;
270
while (lower <= upper)
272
middle = (lower + upper) / 2;
273
if (ORD(c) < ORD( r.content_keys[ middle ] ))
275
else if (ORD(c) > ORD( r.content_keys[ middle ] ))
277
else /* This entry matches. */
279
*content = r.contents[ middle ];
291
/* End of file. =============================================================*/