1
/*************************************************************************/
3
/* Centre for Speech Technology Research */
4
/* University of Edinburgh, UK */
5
/* Copyright (c) 1995,1996 */
6
/* All Rights Reserved. */
8
/* Permission is hereby granted, free of charge, to use and distribute */
9
/* this software and its documentation without restriction, including */
10
/* without limitation the rights to use, copy, modify, merge, publish, */
11
/* distribute, sublicense, and/or sell copies of this work, and to */
12
/* permit persons to whom this work is furnished to do so, subject to */
13
/* the following conditions: */
14
/* 1. The code must retain the above copyright notice, this list of */
15
/* conditions and the following disclaimer. */
16
/* 2. Any modifications must be clearly marked as such. */
17
/* 3. Original authors' names are not deleted. */
18
/* 4. The authors' names are not used to endorse or promote products */
19
/* derived from this software without specific prior written */
22
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
32
/*************************************************************************/
33
/* Author : Simon King */
34
/* Date : June 1998 */
35
/*************************************************************************/
42
#include "ling_class/EST_Utterance.h"
44
typedef EST_TVector<EST_Item*> EST_Item_ptr_Vector;
45
#if defined(INSTANTIATE_TEMPLATES)
46
#include "../base_class/EST_TVector.cc"
48
template class EST_TVector<EST_Item*>;
52
static EST_Item *const def_val_item_ptr = NULL;
53
template <> EST_Item* const *EST_Item_ptr_Vector::def_val = &def_val_item_ptr;
55
static EST_Item* error_return_item_ptr = NULL;
56
template <> EST_Item* *EST_Item_ptr_Vector::error_return = &error_return_item_ptr;
59
float (*local_cost_function)(const EST_Item *item1,
60
const EST_Item *item2);
63
bool (*local_pruning_function)(const int i,
68
bool dp_sub(int i, int j,
69
const EST_Item_ptr_Vector &vr1,
70
const EST_Item_ptr_Vector &vr2,
71
EST_IMatrix &DP_path_i, EST_IMatrix &DP_path_j,
72
local_cost_function lcf,
73
local_pruning_function lpf,
77
void trace_back_and_link(int i, int j,
78
EST_Item *p1, EST_Item *p2,
79
const EST_IMatrix &DP_path_i,
80
const EST_IMatrix &DP_path_j,
83
inline bool null_lpf(const int,const int,const int,const int)
88
bool dp_match(const EST_Relation &lexical,
89
const EST_Relation &surface,
91
local_cost_function lcf,
92
local_pruning_function lpf,
96
bool dp_match(const EST_Relation &lexical,
97
const EST_Relation &surface,
99
local_cost_function lcf,
102
// dp without pruning
104
return dp_match(lexical,surface,match,lcf,null_lpf,null_sym);
108
static float fixed_ins;
109
static float fixed_del;
110
static float fixed_sub;
112
float fixed_local_cost(const EST_Item *s1, const EST_Item *s2)
114
EST_String null_sym = "nil";
116
// otherwise cost is either insertion cost, or cost_matrix value
117
if (s1->name() == s2->name())
121
if (s1->name() == null_sym)
123
else if (s2->name() == null_sym)
131
bool dp_match(const EST_Relation &lexical,
132
const EST_Relation &surface,
134
float ins, float del, float sub)
141
return dp_match(lexical, surface, match, fixed_local_cost,
142
null_lpf, &null_sym);
146
bool dp_match(const EST_Relation &lexical,
147
const EST_Relation &surface,
149
local_cost_function lcf,
150
local_pruning_function lpf,
155
// aligns lexical and surface forms using dynamic programming
156
// i.e. the lexical form is transformed into the surface form
157
// by substitutions, insertions and deletions
159
// makes links between associated (matching or substituted) items
160
// insertions and deletions are 'left dangling'
161
// links are stored in a new relation called "Match"
163
// assumes that local cost computation is cheap (no caching)
165
EST_IMatrix DP_path_i,DP_path_j;
166
EST_Item_ptr_Vector vr1,vr2;
170
l1 = lexical.length() + 1;
171
l2 = surface.length() + 1;
180
for (p=lexical.head(),i=1; p != 0; p = p->next(),i++)
182
for (p=surface.head(),i=1; p != 0; p = p->next(),i++)
185
DP_path_i.resize(l1,l2);
186
DP_path_j.resize(l1,l2);
189
cerr << "Pruning" << endl;
202
// end conditions : must start at (0,0) and finish at (l1-1,l2-1)
203
// i.e. extreme corners of grid
205
cost.resize(vr1.length(),vr2.length());
208
cost.a_no_check(i,j) = -1; // means not computed yet
210
if(!dp_sub(l1-1,l2-1,
213
lcf,lpf,null_sym,cost))
215
cerr << "No path found (over pruning ?)" << endl;
218
// make somewhere to record the relations
219
//utt.create_relation("Match");
220
for (p = lexical.head(); p; p = p->next())
227
cerr << i << "," << j << "=[" << DP_path_i(i,j) << "," << DP_path_j(i,j) << "] ";
233
trace_back_and_link(l1-1,l2-1,
236
DP_path_i,DP_path_j,null_sym);
242
bool dp_sub(int i, int j,
243
const EST_Item_ptr_Vector &vr1,
244
const EST_Item_ptr_Vector &vr2,
245
EST_IMatrix &DP_path_i, EST_IMatrix &DP_path_j,
246
local_cost_function lcf,
247
local_pruning_function lpf,
252
// the goal is to compute cost(i,j)
258
//cerr << "sub " << i << " " << j << endl;
260
int best_i=-1,best_j=-1;
262
float best_c=MAXFLOAT;
265
if(lpf(i,j,vr1.length()-1,vr2.length()-1))
269
// consider all paths into this point
270
// and select the best one (lowest cost)
279
best_c = lcf(null_sym,null_sym);
284
// insert j'th item from vr2
285
if(dp_sub(i,j-1,vr1,vr2,
287
lcf,lpf, null_sym,cost))
291
best_c = lcf(null_sym,vr2(j)) + cost.a(i,j-1);
301
// delete i'th item from vr1
302
if(dp_sub(i-1,j,vr1,vr2,
304
lcf,lpf, null_sym,cost))
308
best_c = lcf(vr1(i),null_sym) + cost.a(i-1,j);
313
// this is the simplest local constraint (i.e. no constraints !)
314
// which allows unlimited consecutive insertions or deletions
319
if(dp_sub(i-1,j-1,vr1,vr2,
321
lcf,lpf, null_sym,cost))
323
sub = 2 * lcf(vr1(i),vr2(j)) + cost(i-1,j-1);
332
if(dp_sub(i,j-1,vr1,vr2,
334
lcf,lpf, null_sym,cost))
336
ins=lcf(null_sym,vr2(j)) + cost(i,j-1);
345
if(dp_sub(i-1,j,vr1,vr2,
347
lcf,lpf, null_sym,cost))
349
del=lcf(vr1(i),null_sym) + cost(i-1,j);
359
cost.a(i,j) = best_c;
360
DP_path_i.a_no_check(i,j) = best_i;
361
DP_path_j.a_no_check(i,j) = best_j;
364
//cerr << "best " << i << "," << j << " = " << best_c << endl;
366
if(best_c == MAXFLOAT)
367
// didn't find a better path
375
void trace_back_and_link(int i, int j,
376
EST_Item *p1, EST_Item *p2,
377
const EST_IMatrix &DP_path_i,
378
const EST_IMatrix &DP_path_j,
382
//cerr << "trace " << i << " " << j << endl;
386
//i=utt.relation("Lexical")->index(p1);
387
//j=utt.relation("Surface")->index(p2);
393
if(DP_path_i(i,j) == i-1)
395
if(DP_path_j(i,j) == j-1)
397
// match, or substitution
398
//cerr << "sub " << p1->name() << " with " << p2->name() << endl;
399
p1->append_daughter(p2);
410
// p1->append_daughter(p2); // decorative
414
trace_back_and_link(DP_path_i(i,j), DP_path_j(i,j),