1
/************************************************************************/
3
/* Centre for Speech Technology Research */
4
/* University of Edinburgh, UK */
5
/* Copyright (c) 2002 */
6
/* All Rights Reserved. */
8
/* Permission is hereby granted, free of charge, to use and distribute */
9
/* this software and its documentation without restriction, including */
10
/* without limitation the rights to use, copy, modify, merge, publish, */
11
/* distribute, sublicense, and/or sell copies of this work, and to */
12
/* permit persons to whom this work is furnished to do so, subject to */
13
/* the following conditions: */
14
/* 1. The code must retain the above copyright notice, this list of */
15
/* conditions and the following disclaimer. */
16
/* 2. Any modifications must be clearly marked as such. */
17
/* 3. Original authors' names are not deleted. */
18
/* 4. The authors' names are not used to endorse or promote products */
19
/* derived from this software without specific prior written */
22
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
32
/*************************************************************************/
34
/* Author: Rob Clark (robert@cstr.ed.ac.uk) */
35
/* -------------------------------------------------------------------- */
36
/* Code to read APML format XML as utterances. */
38
/*************************************************************************/
42
#include "EST_THash.h"
43
#include "EST_error.h"
45
#include "rxp/XML_Parser.h"
47
static EST_Regex simpleIDRegex(".*#id(w\\([0-9]+\\))");
48
static EST_Regex rangeIDRegex(".*#id(w\\([0-9]+\\)).*id(w\\([0-9]+\\))");
49
static EST_Regex RXpunc("[\\.,\\?\\!]+");
60
EST_Relation *semstruct;
61
EST_Relation *emphasis;
62
EST_Relation *boundary;
68
class Apml_Parser_Class : public XML_Parser_Class
71
virtual void document_open(XML_Parser_Class &c,
74
virtual void document_close(XML_Parser_Class &c,
78
virtual void element_open(XML_Parser_Class &c,
82
XML_Attribute_List &attributes);
83
virtual void element(XML_Parser_Class &c,
87
XML_Attribute_List &attributes);
88
virtual void element_close(XML_Parser_Class &c,
93
virtual void pcdata(XML_Parser_Class &c,
97
virtual void cdata(XML_Parser_Class &c,
102
virtual void processing(XML_Parser_Class &c,
105
const char *instruction);
106
virtual void error(XML_Parser_Class &c,
111
static void print_attributes(XML_Attribute_List &attributes)
113
XML_Attribute_List::Entries them;
115
for(them.begin(attributes); them ; them++)
117
(const char *)them->k,
118
(const char *)them->v);
121
EST_read_status apml_read(FILE *file,
122
const EST_String &name,
127
(void)print_attributes; // just to shut -Wall up.
128
Apml_Parser_Class pclass;
135
XML_Parser *parser = pclass.make_parser(file, name, &state);
136
parser->track_context(TRUE);
139
return read_format_error;
150
/** Now we define the callbacks.
153
void Apml_Parser_Class::document_open(XML_Parser_Class &c,
158
Parse_State *state = (Parse_State *)data;
165
state->last_word=NULL;
168
state->perf = state->utt->create_relation("Perfomative");
169
state->com = state->utt->create_relation("Communicative");
170
state->words = state->utt->create_relation("Word");
171
state->semstruct = state->utt->create_relation("SemStructure");
172
state->emphasis = state->utt->create_relation("Emphasis");
173
state->boundary = state->utt->create_relation("Boundary");
178
void Apml_Parser_Class::document_close(XML_Parser_Class &c,
182
(void)c; (void)p; (void)data;
186
void Apml_Parser_Class::element_open(XML_Parser_Class &c,
190
XML_Attribute_List &attributes)
192
(void)c; (void)p; (void)attributes;
193
Parse_State *state = (Parse_State *)data;
195
//cout << " In element_open: " << name << "\n";
197
if (strcmp(name, "turnallocation")==0)
203
if (strcmp(name, "apml")==0)
208
if( strcmp(name, "performative")==0
209
|| strcmp(name, "rheme")==0
210
|| strcmp(name, "theme")==0
211
|| strcmp(name, "emphasis")==0
212
|| strcmp(name, "boundary")==0)
215
// create new item content
216
EST_Item_Content *cont = new EST_Item_Content();
217
cont->set_name(name);
219
XML_Attribute_List::Entries them;
220
for(them.begin(attributes); them ; them++)
222
EST_String k = them->k;
223
EST_String v = them->v;
229
if( strcmp(name, "emphasis")==0 )
231
item = state->emphasis->append();
232
state->pending = item;
234
else if(strcmp(name, "boundary")==0 )
236
item = state->boundary->append();
238
item->append_daughter(state->last_word);
242
if (state->parent == NULL)
243
item = state->semstruct->append();
245
item = state->parent->append_daughter();
249
item->set_contents(cont);
254
EST_warning("SOLE XML Parser: unknown element %s", name);
258
void Apml_Parser_Class::element(XML_Parser_Class &c,
262
XML_Attribute_List &attributes)
264
(void)c; (void)p; (void)attributes;
266
element_open(c, p, data, name, attributes);
267
element_close(c, p, data, name);
271
void Apml_Parser_Class::element_close(XML_Parser_Class &c,
276
(void)c; (void)p; (void)name;
277
Parse_State *state = (Parse_State *)data;
279
if ( strcmp(name, "emphasis")==0
280
|| strcmp(name, "boundary")==0 )
287
if (strcmp(name, "performative")==0
288
|| strcmp(name, "theme")==0
289
|| strcmp(name, "rheme")==0)
292
// state->pending = state->parent;
293
state->pending = NULL;
294
state->parent=state->parent->up();;
299
void Apml_Parser_Class::pcdata(XML_Parser_Class &c,
306
Parse_State *state = (Parse_State *)data;
307
EST_String strings[255];
309
split(chars,strings,255,RXwhite);
311
// for(int cc=0 ; cc < 20 ; ++cc)
312
// cout << cc << ": \"" << strings[cc] << "\" (" << strings[cc].length() << ")\n";
316
while( s < 1 || strings[s].length() > 0 )
318
if(strings[s].length() > 0 )
320
if(strings[s].matches(RXpunc))
322
state->last_word->set("punc",strings[s]);
326
EST_Item_Content *cont = new EST_Item_Content();
329
if (state->parent == NULL)
330
item = state->semstruct->append();
332
item = state->parent->append_daughter();
333
item->set_contents(cont);
336
EST_String ps = strings[s].at(RXpunc);
337
if( ps.length() > 0 )
339
//cout << "Got punc: " << ps << endl;
340
cont->set_name(strings[s].before(RXpunc));
341
item->set("punc",ps);
344
cont->set_name(strings[s]);
346
state->words->append(item);
347
state->last_word = item;
351
state->pending->append_daughter(item);
354
// if (state->parent != NULL && p.context(0) == "w")
355
// state->parent->set(EST_String("word"), chars);
357
//cout << " got word: " << item->name() << "\n";
365
void Apml_Parser_Class::cdata(XML_Parser_Class &c,
370
(void)c; (void)p; (void)data; (void)chars;
371
// Parse_State *state = (Parse_State *)data;
373
// printf("SOLE XML Parser [cdata[%s]] %d\n", chars, state->depth);
377
void Apml_Parser_Class::processing(XML_Parser_Class &c,
380
const char *instruction)
383
Parse_State *state = (Parse_State *)data;
385
printf("SOLE XML Parser [proc[%s]] %d\n", instruction, state->depth);
389
void Apml_Parser_Class::error(XML_Parser_Class &c,
393
(void)c; (void)p; (void)data;
394
// Parse_State *state = (Parse_State *)data;
396
EST_error("SOLE XML Parser %s", get_error(p));