2
* Copyright Ā© 2012 Canonical Ltd.
4
* This program is free software: you can redistribute it and/or modify it
5
* under the terms of the GNU General Public License version 3, as
6
* published by the Free Software Foundation.
8
* This program is distributed in the hope that it will be useful, but
9
* WITHOUT ANY WARRANTY; without even the implied warranties of
10
* MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
11
* PURPOSE. See the GNU General Public License for more details.
13
* You should have received a copy of the GNU General Public License along
14
* with this program. If not, see <http://www.gnu.org/licenses/>.
17
#define G_LOG_DOMAIN "hudsphinx"
20
#include "hudsphinx.h"
21
#include "hud-query-iface.h"
22
#include "hudsource.h"
23
#include "pronounce-dict.h"
26
#include "pocketsphinx.h"
27
#include <sphinxbase/ad.h>
28
#include <sphinxbase/cont_ad.h>
30
#include <gio/gunixoutputstream.h>
31
#include <glib/gstdio.h>
34
hud_sphinx_error_quark(void)
36
static GQuark quark = 0;
38
quark = g_quark_from_static_string ("hud-sphinx-error-quark");
43
hud_sphinx_alphanumeric_regex_new (void)
45
GRegex *alphanumeric_regex = NULL;
48
alphanumeric_regex = g_regex_new("ā¦|\\.\\.\\.", 0, 0, &error);
49
if (alphanumeric_regex == NULL) {
50
g_error("Compiling regex failed: [%s]", error->message);
54
return alphanumeric_regex;
57
static arg_t sphinx_cmd_ln[] = {
59
{ "-adcdev", ARG_STRING, NULL, "Name of audio device to use for input." },
65
GObject parent_instance;
67
HudQueryIfaceComCanonicalHudQuery *skel;
68
GRegex * alphanumeric_regex;
73
typedef GObjectClass HudSphinxClass;
75
static void hud_sphinx_finalize (GObject *object);
77
static gboolean hud_sphinx_voice_query (HudVoice *self, HudSource *source,
78
gchar **result, GError **error);
80
static void hud_sphinx_iface_init (HudVoiceInterface *iface);
82
G_DEFINE_TYPE_WITH_CODE (HudSphinx, hud_sphinx, G_TYPE_OBJECT,
83
G_IMPLEMENT_INTERFACE (HUD_TYPE_VOICE, hud_sphinx_iface_init))
85
static void hud_sphinx_iface_init (HudVoiceInterface *iface)
87
iface->query = hud_sphinx_voice_query;
91
hud_sphinx_class_init (GObjectClass *klass)
93
klass->finalize = hud_sphinx_finalize;
97
hud_sphinx_init (HudSphinx *self)
99
self->alphanumeric_regex = hud_sphinx_alphanumeric_regex_new();
103
hud_sphinx_finalize (GObject *object)
105
HudSphinx *self = HUD_SPHINX (object);
107
g_clear_object(&self->skel);
108
g_clear_pointer(&self->alphanumeric_regex, g_regex_unref);
110
g_clear_pointer(&self->ps, ps_free);
112
G_OBJECT_CLASS (hud_sphinx_parent_class)
117
hud_sphinx_new (HudQueryIfaceComCanonicalHudQuery *skel, const gchar *device, GError **error)
119
HudSphinx *self = g_object_new (HUD_TYPE_SPHINX, NULL);
120
self->skel = g_object_ref(skel);
122
gchar *hmm = HMM_PATH;
123
gchar *dict = DICT_PATH;
125
if (device != NULL) {
126
self->config = cmd_ln_init(NULL, sphinx_cmd_ln, TRUE,
132
self->config = cmd_ln_init(NULL, sphinx_cmd_ln, TRUE,
138
if (self->config == NULL) {
139
g_warning("Sphinx command line arguments failed to initialize");
140
g_set_error_literal (error, hud_sphinx_error_quark (), 0,
141
"Sphinx command line arguments failed to initialize");
145
self->ps = ps_init(self->config);
146
if (self->ps == NULL) {
147
g_warning("Unable to initialize Sphinx decoder");
148
g_set_error_literal (error, hud_sphinx_error_quark (), 0,
149
"Unable to initialize Sphinx decoder");
156
/* Start code taken from PocketSphinx */
158
/* Sleep for specified msec */
162
#if (defined(WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
165
/* ------------------- Unix ------------------ */
169
tmo.tv_usec = ms * 1000;
171
select(0, NULL, NULL, NULL, &tmo);
176
hud_sphinx_utterance_loop(HudSphinx *self, gchar **result, GError **error)
185
cmd_ln_t *config = self->config;
186
ps_decoder_t *ps = self->ps;
188
if ((ad = ad_open_dev (cmd_ln_str_r (config, "-adcdev"),
189
(int) cmd_ln_float32_r(config, "-samprate"))) == NULL )
191
g_warning("Failed to open audio device");
193
g_set_error_literal (error, hud_sphinx_error_quark (), 0,
194
"Failed to open audio device");
198
/* Initialize continuous listening module */
199
if ((cont = cont_ad_init (ad, ad_read)) == NULL )
201
g_warning("Failed to initialize voice activity detection");
203
g_set_error_literal (error, hud_sphinx_error_quark (), 0,
204
"Failed to initialize voice activity detection");
207
if (ad_start_rec (ad) < 0)
209
g_warning("Failed to start recording");
211
g_set_error_literal (error, hud_sphinx_error_quark (), 0,
212
"Failed to start recording");
216
/* Indicate listening for next utterance */
217
g_debug("Voice query is listening");
218
hud_query_iface_com_canonical_hud_query_emit_voice_query_listening (
219
HUD_QUERY_IFACE_COM_CANONICAL_HUD_QUERY (self->skel));
221
/* Wait data for next utterance */
222
while ((k = cont_ad_read (cont, adbuf, 4096)) == 0)
227
g_warning("Failed to read audio");
229
g_set_error_literal (error, hud_sphinx_error_quark (), 0,
230
"Failed to read audio");
235
* Non-zero amount of data received; start recognition of new utterance.
236
* NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
238
if (ps_start_utt (ps, NULL ) < 0)
239
g_error("Failed to start utterance");
240
g_debug("Voice query has heard something");
241
hud_query_iface_com_canonical_hud_query_emit_voice_query_heard_something(
242
HUD_QUERY_IFACE_COM_CANONICAL_HUD_QUERY (self->skel));
243
ps_process_raw (ps, adbuf, k, FALSE, FALSE);
245
/* Note timestamp for this first block of data */
248
/* Decode utterance until end (marked by a "long" silence, >1sec) */
251
/* Read non-silence audio data, if any, from continuous listening module */
252
if ((k = cont_ad_read (cont, adbuf, 4096)) < 0)
253
g_error("Failed to read audio");
257
* No speech data available; check current timestamp with most recent
258
* speech to see if more than 1 sec elapsed. If so, end of utterance.
260
if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
265
/* New speech data received; note current timestamp */
270
* Decode whatever data was read above.
272
rem = ps_process_raw (ps, adbuf, k, FALSE, FALSE);
274
/* If no work to be done, sleep a bit */
275
if ((rem == 0) && (k == 0))
280
* Utterance ended; flush any accumulated, unprocessed A/D data and stop
281
* listening until current utterance completely decoded
284
while (ad_read (ad, adbuf, 4096) >= 0);
285
cont_ad_reset (cont);
287
g_debug("Voice query has stopped listening, processing...");
289
/* Finish decoding, obtain and print result */
291
hyp = ps_get_hyp (ps, NULL, &uttid);
294
cont_ad_close (cont);
299
*result = g_strdup (hyp);
309
/* End code taken from PocketSphinx */
311
/* Actually recognizing the Audio */
313
hud_sphinx_listen (HudSphinx *self, fsg_model_t* fsg,
314
gchar **result, GError **error)
316
// Get the fsg set or create one if none
317
fsg_set_t *fsgs = ps_get_fsgset(self->ps);
319
fsgs = ps_update_fsgset(self->ps);
321
// Remove the old fsg
322
fsg_model_t * old_fsg = fsg_set_get_fsg(fsgs, fsg_model_name(fsg));
325
fsg_set_remove(fsgs, old_fsg);
326
fsg_model_free(old_fsg);
330
fsg_set_add(fsgs, fsg_model_name(fsg), fsg);
331
fsg_set_select (fsgs, fsg_model_name(fsg));
333
ps_update_fsgset (self->ps);
335
gboolean success = hud_sphinx_utterance_loop (self, result, error);
338
g_debug("Recognized: %s", *result);
340
g_warning("Utterance loop failed");
347
free_func (gpointer data)
349
g_ptr_array_free((GPtrArray*) data, TRUE);
353
hud_sphinx_number_of_states(GPtrArray *command_list)
355
gint number_of_states = 0;
358
for (i = 0; i < command_list->len; ++i)
360
GPtrArray *command = g_ptr_array_index(command_list, i);
361
number_of_states += command->len;
364
// the number of states calculated above doesn't include the start and end
365
return number_of_states + 2;
369
hud_sphinx_write_command (fsg_model_t *fsg, GPtrArray *command,
370
gint state_num, gfloat command_probability)
372
// the first transition goes from the state 0
373
// it's probability depends on how many commands there are
374
if (command->len > 0)
376
const gchar *word = g_ptr_array_index(command, 0);
377
gchar *lower = g_utf8_strdown(word, -1);
378
gint wid = fsg_model_word_add (fsg, lower);
379
fsg_model_trans_add (fsg, 0, ++state_num,
380
command_probability, wid);
384
// the rest of the transitions are certain (straight path)
385
// so have probability 1.0
387
for (i = 1; i < command->len; ++i)
389
const gchar *word = g_ptr_array_index(command, i);
390
gchar *lower = g_utf8_strdown(word, -1);
391
gint wid = fsg_model_word_add (fsg, lower);
392
fsg_model_trans_add (fsg, state_num, state_num + 1,
398
// null transition to exit state
399
fsg_model_null_trans_add (fsg, state_num, 1, 0);
405
hud_sphinx_build_grammar (HudSphinx *self, GList *items,
406
fsg_model_t **fsg, GError **error)
408
PronounceDict *dict = pronounce_dict_get_sphinx(error);
414
/* Get the pronounciations for the items */
415
GHashTable *pronounciations = g_hash_table_new_full (g_str_hash, g_str_equal,
416
g_free, (GDestroyNotify) g_strfreev);
417
GPtrArray *command_list = g_ptr_array_new_with_free_func (free_func);
418
GHashTable *unique_commands = g_hash_table_new(g_str_hash, g_str_equal);
419
HudItemPronunciationData pronounciation_data =
420
{ pronounciations, self->alphanumeric_regex, command_list, dict, unique_commands };
421
g_list_foreach (items, (GFunc) hud_item_insert_pronounciation,
422
&pronounciation_data);
423
g_hash_table_destroy(unique_commands);
425
if (command_list->len == 0)
427
g_set_error_literal (error, hud_sphinx_error_quark(), 0, "Could not build Sphinx grammar. Is sphinx-voxforge installed?");
428
g_clear_pointer(&pronounciations, g_hash_table_destroy);
429
g_ptr_array_free (command_list, TRUE);
433
gint number_of_states = hud_sphinx_number_of_states(command_list);
434
gfloat command_probability = 1.0f / command_list->len;
436
g_debug("Number of states [%d]", number_of_states);
438
*fsg = fsg_model_init ("<hud.GRAM>", ps_get_logmath (self->ps),
439
cmd_ln_float32_r(self->config, "-lw"), number_of_states);
440
(*fsg)->start_state = 0;
441
(*fsg)->final_state = 1;
443
// starting at state 2 (0 is start and 1 is exit)
446
for (i = 0; i < command_list->len; ++i)
448
GPtrArray *command = g_ptr_array_index(command_list, i);
449
// keep a record of the number of states so far
450
state_num = hud_sphinx_write_command(*fsg, command, state_num, command_probability);
453
glist_t nulls = fsg_model_null_trans_closure (*fsg, NULL );
456
g_clear_pointer(&pronounciations, g_hash_table_destroy);
457
g_ptr_array_free (command_list, TRUE);
463
hud_sphinx_voice_query (HudVoice *voice, HudSource *source, gchar **result, GError **error)
465
g_return_val_if_fail(HUD_IS_SPHINX(voice), FALSE);
466
HudSphinx *self = HUD_SPHINX(voice);
468
if (source == NULL) {
469
/* No active window, that's fine, but we'll just move on */
471
g_set_error_literal (error, hud_sphinx_error_quark(), 0, "Active source is null");
475
GList *items = hud_source_get_items(source);
477
/* The active window doesn't have items, that's cool. We'll move on. */
482
fsg_model_t *fsg = NULL;
483
if (!hud_sphinx_build_grammar(self, items, &fsg, error))
485
g_list_free_full(items, g_object_unref);
489
gboolean success = hud_sphinx_listen (self, fsg, result, error);
491
g_list_free_full(items, g_object_unref);