1
/* ====================================================================
2
* Copyright (c) 1996-2000 Carnegie Mellon University. All rights
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in
14
* the documentation and/or other materials provided with the
17
* This work was supported in part by funding from the Defense Advanced
18
* Research Projects Agency and the National Science Foundation of the
19
* United States of America, and the CMU Sphinx Speech Consortium.
21
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
22
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
25
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
* ====================================================================
36
/*********************************************************************
44
*********************************************************************/
48
#include <sphinxbase/ckd_alloc.h>
56
s3parse_quest(pset_t *pset, uint32 n_pset, quest_t *q, char *in_str)
63
/* skip leading whitespace */
64
for (; *s != '\0' && isspace((unsigned char)*s); s++);
66
if (*s == '\0') /* Nothing to parse */
73
E_ERROR("question syntax error");
83
E_ERROR("Expected space after question name\n");
89
for (i = 0; i < n_pset; i++) {
90
if (strcmp(s, pset[i].name) == 0) {
92
q->member = pset[i].member;
93
q->posn = pset[i].posn;
99
E_ERROR("Unknown question %s\n", s);
106
*sp = ' '; /* undo set to null */
108
/* skip whitespace */
109
for (; *s != '\0' && isspace((unsigned char)*s); s++);
117
else if (s[0] == '0') {
121
else if (s[0] == '1') {
126
/* skip trailing whitespace, if any */
127
for (; *s != '\0' && isspace((unsigned char)*s); s++);
133
count_quest_in_conj(pset_t *pset,
146
for (; *t != '\0' && isspace((unsigned char)*t); t++);
148
E_ERROR("Empty conjunction\n");
152
while (t && *t != ')' && *t != '\0') {
153
t = s3parse_quest(pset, n_pset, q, t);
156
for (; t && *t != '\0' && isspace((unsigned char)*t); t++);
159
E_ERROR("Error while parsing conjunction: %s\n", in_str);
164
E_ERROR("Error while parsing conjunction: %s\n", in_str);
173
s3parse_conj(pset_t *pset,
186
if (*s == '\0') return s;
188
/* skip leading whitespace */
189
for (; *s != '\0' && isspace((unsigned char)*s); s++);
191
if (*s == '\0') return s;
197
E_ERROR("Expected '(' before conjunction\n");
202
for (; *s != '\0' && isspace((unsigned char)*s); s++);
205
E_ERROR("No terms and close paren in conjunction\n", in_str);
210
n_quest = count_quest_in_conj(pset, n_pset, s);
211
*n_simple_q = n_quest;
213
termlst = (quest_t *)ckd_calloc(n_quest, sizeof(quest_t));
216
for (i = 0; i < n_quest; i++) {
217
s = s3parse_quest(pset, n_pset, &termlst[i], s);
218
for (; *s != '\0' && isspace((unsigned char)*s); s++);
229
s3cnt_q_term(char *in_str)
236
/* skip any leading whitespace */
237
for (; *s != '\0' && isspace((unsigned char)*s); s++);
239
/* assume everything is well-formed for the moment.
240
* later processing will catch syntax errors
241
* which should be unlikely anyway since this stuff
242
* is most likely machine generated */
244
for (s++, n_term = 0; *s && (s = strchr(s, '(')); n_term++, s++);
250
s3parse_comp_quest(pset_t *pset,
260
for (; *s != '\0' && isspace((unsigned char)*s); s++);
263
E_ERROR("Empty string seen for composite question\n");
269
E_ERROR("Composite question does not begin with '(' : %s\n",
275
q->sum_len = s3cnt_q_term(in_str);
276
q->conj_q = (quest_t **)ckd_calloc(q->sum_len, sizeof(quest_t *));
277
q->prod_len = (uint32 *)ckd_calloc(q->sum_len, sizeof(uint32));
279
++s; /* skip the open paren */
283
s = s3parse_conj(pset,
289
} while (s && *s && *s == '(');
292
E_ERROR("Error while parsing %s\n", in_str);
301
parse_simple_q(quest_t *q,
309
assert(q_str != NULL);
313
/* skip leading whitespace */
314
for (i = 0; i < len && isspace((unsigned char)q_str[i]); i++);
319
if (q_str[i] == '~') {
327
pset = atoi(&q_str[i]);
333
else if (pset < 400) {
339
/* HACK to get around WDBNDRY question context */
345
parse_conj(quest_t **term,
356
/* copy the next product into t_str */
357
eot = strchr(q_str, '|');
360
strncpy(t_str, q_str, (eot - q_str));
361
t_str[(eot - q_str)] = '\0';
364
strcpy(t_str, q_str);
367
/* count the # of terms in the product */
371
t = strchr(t+1, '&');
377
/* allocate a simple question for each term in product */
378
out = ckd_calloc(n_q, sizeof(quest_t));
383
/* parse each simple question */
384
simp_q_str = strtok(t_str, "&");
387
parse_simple_q(&out[i], simp_q_str);
388
simp_q_str = strtok(NULL, "&");
390
} while (simp_q_str);
396
cnt_q_term(char *q_str)
405
t = strchr(t+1, '|');
413
parse_compound_q(comp_quest_t *q,
419
q->sum_len = cnt_q_term(q_str);
420
q->conj_q = ckd_calloc(q->sum_len, sizeof(quest_t *));
421
q->prod_len = ckd_calloc(q->sum_len, sizeof(uint32));
426
rem_q_str = parse_conj(&q->conj_q[i],
434
print_quest(FILE *fp,
439
fprintf(fp, "%s%d %d",
445
fprintf(fp, "%s%s %d",
453
eval_quest(quest_t *q,
463
ret = q->member[feat[ctxt]];
465
ret = q->posn[feat[n_feat-1]];
467
E_FATAL("Ill-formed question\n");
470
if (q->neg) ret = !ret;
473
E_INFO("eval: (%s%u %d) %u -> %u\n",
477
(q->member ? q->member[feat[ctxt]] :
478
q->posn[feat[n_feat-1]]),
486
eval_comp_quest(comp_quest_t *q,
492
for (i = 0; i < q->sum_len; i++) {
493
for (j = 0; j < q->prod_len[i]; j++) {
494
if (!eval_quest(&q->conj_q[i][j], feat, n_feat))
498
/* One of the terms in the disjunction
499
* is satisfied; so the whole is satisfied */
500
if (j == q->prod_len[i])
504
/* visited all terms in the disjunction and none
505
* were satisified; so neither is the disjunction */
510
print_comp_quest(FILE *fp,
517
for (i = 0; i < q->sum_len; i++) {
519
print_quest(fp, pset, &q->conj_q[i][0]);
521
for (j = 1; j < q->prod_len[i]; j++) {
523
print_quest(fp, pset, &q->conj_q[i][j]);
531
is_subset(quest_t *a,
538
if (a->member && b->member) {
539
if (a->ctxt != b->ctxt)
542
for (p = 0; p < n_phone; p++) {
553
if (f_a && (f_a != f_b)) {
562
else if ((a->member && b->posn) ||
563
(a->posn && b->member)) {
564
/* one question about word boundary
565
* and the other is about phone context
569
else if (a->posn && b->posn) {
570
/* Not handled at the moment */
578
simplify_conj(quest_t *conj,
583
int *del, exist_del = FALSE;
587
if (n_term == 1) /* Only one term; nothing to do */
590
del = ckd_calloc(n_term, sizeof(int));
592
/* Search for all pairs (i,j) where
593
* term_i is a subset of term_j. Mark
594
* all such term_j's for deletion since
595
* term_i && term_j == term_i */
596
for (i = 0; i < n_term; i++) {
597
for (j = 0; j < n_term; j++) {
598
if ((i != j) && (!del[i] || !del[j])) {
599
if (is_subset(&conj[i], &conj[j], n_phone)) {
600
/* mark the superset for deletion */
608
/* compact the conjunction by removing
609
* term_j's that are marked for deletion.
611
for (i = 0, j = 0; j < n_term; i++, j++) {
613
/* move j to the next
614
* non-deleted term (if any) */
615
for (j++; del[j] && (j < n_term); j++);
627
return i; /* return new n_term */
631
simplify_comp_quest(comp_quest_t *q,
638
for (i = 0; i < q->sum_len; i++) {
639
prod_len = simplify_conj(q->conj_q[i], q->prod_len[i], n_phone);
640
if (prod_len < q->prod_len[i]) {
641
assert(!(prod_len > q->prod_len[i]));
643
q->prod_len[i] = prod_len;
649
/* TRUE if there is at least one term in the composite
650
* question that was simplified */