/* * Copyright © 2012 Canonical Ltd. * * This program is free software: you can redistribute it and/or modify it * under the terms of the GNU General Public License version 3, as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranties of * MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #define G_LOG_DOMAIN "hudsphinx" #include "voice.h" #include "sphinx.h" #include "hud-query-iface.h" #include "source.h" #include "pronounce-dict.h" /* Pocket Sphinx */ #include "pocketsphinx.h" #include #include #include #include static GQuark hud_sphinx_error_quark(void) { static GQuark quark = 0; if (quark == 0) quark = g_quark_from_static_string ("hud-sphinx-error-quark"); return quark; } static GRegex * hud_sphinx_alphanumeric_regex_new (void) { GRegex *alphanumeric_regex = NULL; GError *error = NULL; alphanumeric_regex = g_regex_new("…|\\.\\.\\.", 0, 0, &error); if (alphanumeric_regex == NULL) { g_error("Compiling regex failed: [%s]", error->message); g_error_free(error); } return alphanumeric_regex; } static arg_t sphinx_cmd_ln[] = { POCKETSPHINX_OPTIONS, { "-adcdev", ARG_STRING, NULL, "Name of audio device to use for input." }, CMDLN_EMPTY_OPTION }; struct _HudSphinx { GObject parent_instance; HudQueryIfaceComCanonicalHudQuery *skel; GRegex * alphanumeric_regex; cmd_ln_t *config; ps_decoder_t *ps; }; typedef GObjectClass HudSphinxClass; static void hud_sphinx_finalize (GObject *object); static gboolean hud_sphinx_voice_query (HudVoice *self, HudSource *source, gchar **result, GError **error); static void hud_sphinx_iface_init (HudVoiceInterface *iface); G_DEFINE_TYPE_WITH_CODE (HudSphinx, hud_sphinx, G_TYPE_OBJECT, G_IMPLEMENT_INTERFACE (HUD_TYPE_VOICE, hud_sphinx_iface_init)) static void hud_sphinx_iface_init (HudVoiceInterface *iface) { iface->query = hud_sphinx_voice_query; } static void hud_sphinx_class_init (GObjectClass *klass) { klass->finalize = hud_sphinx_finalize; } static void hud_sphinx_init (HudSphinx *self) { self->alphanumeric_regex = hud_sphinx_alphanumeric_regex_new(); } static void hud_sphinx_finalize (GObject *object) { HudSphinx *self = HUD_SPHINX (object); g_clear_object(&self->skel); g_clear_pointer(&self->alphanumeric_regex, g_regex_unref); g_clear_pointer(&self->ps, ps_free); G_OBJECT_CLASS (hud_sphinx_parent_class) ->finalize (object); } HudSphinx * hud_sphinx_new (HudQueryIfaceComCanonicalHudQuery *skel, const gchar *device, GError **error) { HudSphinx *self = g_object_new (HUD_TYPE_SPHINX, NULL); self->skel = g_object_ref(skel); gchar *hmm = HMM_PATH; gchar *dict = DICT_PATH; if (device != NULL) { self->config = cmd_ln_init(NULL, sphinx_cmd_ln, TRUE, "-hmm", hmm, "-dict", dict, "-adcdev", device, NULL); } else { self->config = cmd_ln_init(NULL, sphinx_cmd_ln, TRUE, "-hmm", hmm, "-dict", dict, NULL); } if (self->config == NULL) { g_warning("Sphinx command line arguments failed to initialize"); g_set_error_literal (error, hud_sphinx_error_quark (), HUD_VOICE_INITIALISATION_ERROR, "Sphinx command line arguments failed to initialize"); return NULL; } self->ps = ps_init(self->config); if (self->ps == NULL) { g_warning("Unable to initialize Sphinx decoder"); g_set_error_literal (error, hud_sphinx_error_quark (), HUD_VOICE_INITIALISATION_ERROR, "Unable to initialize Sphinx decoder"); return NULL; } return self; } /* Start code taken from PocketSphinx */ /* Sleep for specified msec */ static void sleep_msec(int32 ms) { #if (defined(WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE) Sleep(ms); #else /* ------------------- Unix ------------------ */ struct timeval tmo; tmo.tv_sec = 0; tmo.tv_usec = ms * 1000; select(0, NULL, NULL, NULL, &tmo); #endif } static gboolean hud_sphinx_utterance_loop(HudSphinx *self, gchar **result, GError **error) { ad_rec_t *ad; int16 adbuf[4096]; int32 k, ts, rem; char const *hyp; char const *uttid; cont_ad_t *cont; cmd_ln_t *config = self->config; ps_decoder_t *ps = self->ps; if ((ad = ad_open_dev (cmd_ln_str_r (config, "-adcdev"), (int) cmd_ln_float32_r(config, "-samprate"))) == NULL ) { g_warning("Failed to open audio device"); *result = NULL; g_set_error_literal(error, hud_sphinx_error_quark(), HUD_VOICE_AUDIO_DEVICE_OPEN_ERROR, "Failed to open audio device"); return FALSE; } /* Initialize continuous listening module */ if ((cont = cont_ad_init (ad, ad_read)) == NULL ) { g_warning("Failed to initialize voice activity detection"); *result = NULL; g_set_error_literal(error, hud_sphinx_error_quark(), HUD_VOICE_AUDIO_DEVICE_OPEN_ERROR, "Failed to initialize voice activity detection"); return FALSE; } if (ad_start_rec (ad) < 0) { g_warning("Failed to start recording"); *result = NULL; g_set_error_literal(error, hud_sphinx_error_quark(), HUD_VOICE_READ_ERROR, "Failed to start recording"); return FALSE; } /* Indicate listening for next utterance */ g_debug("Voice query is listening"); hud_query_iface_com_canonical_hud_query_emit_voice_query_listening ( HUD_QUERY_IFACE_COM_CANONICAL_HUD_QUERY (self->skel)); int attempts = 0; /* Wait data for next utterance */ while ((k = cont_ad_read (cont, adbuf, 4096)) == 0) { ++attempts; if(attempts == 100) { break; } sleep_msec (100); } if (k == 0) { g_warning("Nothing was heard"); *result = NULL; g_set_error_literal (error, hud_sphinx_error_quark (), HUD_VOICE_NO_AUDIO_ERROR, "Nothing was heard"); return FALSE; } if (k < 0) { g_warning("Failed to read audio"); *result = NULL; g_set_error_literal (error, hud_sphinx_error_quark (), HUD_VOICE_READ_ERROR, "Failed to read audio"); return FALSE; } /* * Non-zero amount of data received; start recognition of new utterance. * NULL argument to uttproc_begin_utt => automatic generation of utterance-id. */ if (ps_start_utt (ps, NULL ) < 0) { g_warning("Failed to start utterance"); *result = NULL; g_set_error_literal (error, hud_sphinx_error_quark (), HUD_VOICE_READ_ERROR, "Failed to start utterance"); return FALSE; } g_debug("Voice query has heard something"); hud_query_iface_com_canonical_hud_query_emit_voice_query_heard_something( HUD_QUERY_IFACE_COM_CANONICAL_HUD_QUERY (self->skel)); ps_process_raw (ps, adbuf, k, FALSE, FALSE); /* Note timestamp for this first block of data */ ts = cont->read_ts; /* Decode utterance until end (marked by a "long" silence, >1sec) */ for (;;) { /* Read non-silence audio data, if any, from continuous listening module */ if ((k = cont_ad_read (cont, adbuf, 4096)) < 0) { g_warning("Failed to read audio"); *result = NULL; g_set_error_literal (error, hud_sphinx_error_quark (), HUD_VOICE_READ_ERROR, "Failed to read audio"); return FALSE; } if (k == 0) { /* * No speech data available; check current timestamp with most recent * speech to see if more than 1 sec elapsed. If so, end of utterance. */ if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC) break; } else { /* New speech data received; note current timestamp */ ts = cont->read_ts; /* Check for timeout */ if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC * 30) { g_warning("Nothing was heard"); *result = NULL; g_set_error_literal (error, hud_sphinx_error_quark (), HUD_VOICE_NO_AUDIO_ERROR, "Nothing was heard"); return FALSE; } } /* * Decode whatever data was read above. */ rem = ps_process_raw (ps, adbuf, k, FALSE, FALSE); /* If no work to be done, sleep a bit */ if ((rem == 0) && (k == 0)) sleep_msec (20); } /* * Utterance ended; flush any accumulated, unprocessed A/D data and stop * listening until current utterance completely decoded */ ad_stop_rec (ad); while (ad_read (ad, adbuf, 4096) >= 0); cont_ad_reset (cont); g_debug("Voice query has stopped listening, processing..."); fflush (stdout); /* Finish decoding, obtain and print result */ ps_end_utt (ps); hyp = ps_get_hyp (ps, NULL, &uttid); fflush (stdout); cont_ad_close (cont); ad_close (ad); if (hyp) { *result = g_strdup (hyp); } else { *result = NULL; } return TRUE; } /* End code taken from PocketSphinx */ /* Actually recognizing the Audio */ static gboolean hud_sphinx_listen (HudSphinx *self, fsg_model_t* fsg, gchar **result, GError **error) { // Get the fsg set or create one if none fsg_set_t *fsgs = ps_get_fsgset(self->ps); if (fsgs == NULL) fsgs = ps_update_fsgset(self->ps); // Remove the old fsg fsg_model_t * old_fsg = fsg_set_get_fsg(fsgs, fsg_model_name(fsg)); if (old_fsg) { fsg_set_remove(fsgs, old_fsg); fsg_model_free(old_fsg); } // Add the new fsg fsg_set_add(fsgs, fsg_model_name(fsg), fsg); fsg_set_select (fsgs, fsg_model_name(fsg)); ps_update_fsgset (self->ps); gboolean success = hud_sphinx_utterance_loop (self, result, error); if (success) { g_debug("Recognized: %s", *result); } else { g_warning("Utterance loop failed"); } return success; } static void free_func (gpointer data) { g_ptr_array_free((GPtrArray*) data, TRUE); } static gint hud_sphinx_number_of_states(GPtrArray *command_list) { gint number_of_states = 0; guint i; for (i = 0; i < command_list->len; ++i) { GPtrArray *command = g_ptr_array_index(command_list, i); number_of_states += command->len; } // the number of states calculated above doesn't include the start and end return number_of_states + 2; } static gint hud_sphinx_write_command (fsg_model_t *fsg, GPtrArray *command, gint state_num, gfloat command_probability) { // the first transition goes from the state 0 // it's probability depends on how many commands there are if (command->len > 0) { const gchar *word = g_ptr_array_index(command, 0); gchar *lower = g_utf8_strdown(word, -1); gint wid = fsg_model_word_add (fsg, lower); fsg_model_trans_add (fsg, 0, ++state_num, command_probability, wid); g_free(lower); } // the rest of the transitions are certain (straight path) // so have probability 1.0 guint i; for (i = 1; i < command->len; ++i) { const gchar *word = g_ptr_array_index(command, i); gchar *lower = g_utf8_strdown(word, -1); gint wid = fsg_model_word_add (fsg, lower); fsg_model_trans_add (fsg, state_num, state_num + 1, 1.0, wid); ++state_num; g_free(lower); } // null transition to exit state fsg_model_null_trans_add (fsg, state_num, 1, 0); return state_num; } static gboolean hud_sphinx_build_grammar (HudSphinx *self, GList *items, fsg_model_t **fsg, GError **error) { PronounceDict *dict = pronounce_dict_get_sphinx(error); if (dict == NULL) { return FALSE; } /* Get the pronounciations for the items */ GHashTable *pronounciations = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify) g_strfreev); GPtrArray *command_list = g_ptr_array_new_with_free_func (free_func); GHashTable *unique_commands = g_hash_table_new(g_str_hash, g_str_equal); HudItemPronunciationData pronounciation_data = { pronounciations, self->alphanumeric_regex, command_list, dict, unique_commands }; g_list_foreach (items, (GFunc) hud_item_insert_pronounciation, &pronounciation_data); g_hash_table_destroy(unique_commands); if (command_list->len == 0) { g_set_error_literal (error, hud_sphinx_error_quark(), HUD_VOICE_HUD_STATE_ERROR, "Could not build Sphinx grammar. Is sphinx-voxforge installed?"); g_clear_pointer(&pronounciations, g_hash_table_destroy); g_ptr_array_free (command_list, TRUE); return FALSE; } gint number_of_states = hud_sphinx_number_of_states(command_list); gfloat command_probability = 1.0f / command_list->len; g_debug("Number of states [%d]", number_of_states); *fsg = fsg_model_init ("", ps_get_logmath (self->ps), cmd_ln_float32_r(self->config, "-lw"), number_of_states); (*fsg)->start_state = 0; (*fsg)->final_state = 1; // starting at state 2 (0 is start and 1 is exit) gint state_num = 1; guint i; for (i = 0; i < command_list->len; ++i) { GPtrArray *command = g_ptr_array_index(command_list, i); // keep a record of the number of states so far state_num = hud_sphinx_write_command(*fsg, command, state_num, command_probability); } glist_t nulls = fsg_model_null_trans_closure (*fsg, NULL ); glist_free (nulls); g_clear_pointer(&pronounciations, g_hash_table_destroy); g_ptr_array_free (command_list, TRUE); return TRUE; } static gboolean hud_sphinx_voice_query (HudVoice *voice, HudSource *source, gchar **result, GError **error) { g_return_val_if_fail(HUD_IS_SPHINX(voice), FALSE); HudSphinx *self = HUD_SPHINX(voice); if (source == NULL) { /* No active window, that's fine, but we'll just move on */ *result = NULL; g_set_error_literal (error, hud_sphinx_error_quark(), HUD_VOICE_HUD_STATE_ERROR, "Active source is null"); return FALSE; } GList *items = hud_source_get_items(source); if (items == NULL) { /* The active window doesn't have items, that's cool. We'll move on. */ *result = NULL; return TRUE; } fsg_model_t *fsg = NULL; if (!hud_sphinx_build_grammar(self, items, &fsg, error)) { g_list_free_full(items, g_object_unref); return FALSE; } gboolean success = hud_sphinx_listen (self, fsg, result, error); g_list_free_full(items, g_object_unref); return success; }