2
* baratinoo.c - Speech Dispatcher backend for Baratinoo (VoxyGen)
4
* Copyright (C) 2016 Brailcom, o.p.s.
6
* This is free software; you can redistribute it and/or modify it
7
* under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2, or (at your option)
11
* This software is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this package; see the file COPYING. If not, write to
18
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19
* Boston, MA 02110-1301, USA.
23
* Input and output choices.
25
* - The input is sent to the engine through a BCinputTextBuffer. There is
26
* a single one of those at any given time, and it is filled in
27
* module_speak() and consumed in the synthesis thread.
29
* This doesn't use an input callback generating a continuous flow (and
30
* blocking waiting for more data) even though it would be a fairly nice
31
* design and would allow not to set speech attributes like volume, pitch and
32
* rate as often. This is because the Baratinoo engine has 2 limitations on
35
* * It consumes everything (or at least a lot) up until the callbacks
36
* reports the input end by returning 0. Alternatively one could use the
37
* \flush command followed by a newline, so this is not really limiting.
39
* * More problematic, as the buffer callback is expected to feed a single
40
* input, calling BCpurge() (for handling stop events) unregisters it,
41
* requiring to re-add it afterward. This renders the continuous flow a
42
* lot less useful, as speech attributes like volume, pitch and rate would
43
* have to be set again.
45
* - The output uses the signal buffer instead of callback.
46
* The output callback sends sound to the output module phonem by
47
* phonem, which cause noise parasits with ALSA due to a reset of
48
* parameters for each sound call.
55
#include <semaphore.h>
57
#define BARATINOO_C_API
58
#include "baratinoo.h"
59
#include "baratinooio.h"
61
#include "spd_audio.h"
63
#include <speechd_types.h>
65
#include "module_utils.h"
67
#define MODULE_NAME "baratinoo"
68
#define DBG_MODNAME "Baratinoo: "
69
#define MODULE_VERSION "0.1"
71
#define DEBUG_MODULE 1
75
/* Thread primitives */
80
/* The buffer consumed by the TTS engine. It is NULL when the TTS
81
* thread is ready to accept new input. Otherwise, the thread is in
82
* the process of synthesizing speech. */
83
BCinputTextBuffer buffer;
84
/* The output signal */
85
BCoutputSignalBuffer output_signal;
87
SPDVoice **voice_list;
93
gboolean pause_requested;
94
gboolean stop_requested;
95
gboolean close_requested;
98
/* engine and state */
99
static Engine baratinoo_engine = {
102
.output_signal = NULL,
105
.pause_requested = FALSE,
106
.stop_requested = FALSE,
107
.close_requested = FALSE
110
/* Internal functions prototypes */
111
static void *_baratinoo_speak(void *);
112
static SPDVoice **baratinoo_list_voices(BCengine *engine);
114
static void baratinoo_set_voice_type(SPDVoiceType voice);
115
static void baratinoo_set_language(char *lang);
116
static void baratinoo_set_synthesis_voice(char *synthesis_voice);
117
/* Engine callbacks */
118
static void baratinoo_trace_cb(BaratinooTraceLevel level, int engine_num, const char *source, const void *data, const char *format, va_list args);
119
static int baratinoo_output_signal(void *privateData, const void *address, int length);
120
/* SSML conversion functions */
121
static void append_ssml_as_proprietary(const Engine *engine, GString *buf, const char *data, gsize size);
123
/* Module configuration options */
124
MOD_OPTION_1_STR(BaratinooConfigPath);
125
MOD_OPTION_1_INT(BaratinooSampleRate);
126
MOD_OPTION_1_INT(BaratinooMinRate);
127
MOD_OPTION_1_INT(BaratinooNormalRate);
128
MOD_OPTION_1_INT(BaratinooMaxRate);
129
MOD_OPTION_1_STR(BaratinooPunctuationList);
130
MOD_OPTION_1_STR(BaratinooIntonationList);
132
/* Public functions */
134
int module_load(void)
136
const char *conf_env;
137
char *default_config = NULL;
139
INIT_SETTINGS_TABLES();
143
/* BaratinooConfigPath default value comes from the environment or
144
* user XDG configuration location */
145
conf_env = getenv("BARATINOO_CONFIG_PATH");
146
if (conf_env && conf_env[0] != '\0') {
147
default_config = g_strdup(conf_env);
149
default_config = g_build_filename(g_get_user_config_dir(),
150
"baratinoo.cfg", NULL);
152
MOD_OPTION_1_STR_REG(BaratinooConfigPath, default_config);
153
g_free(default_config);
155
/* Sample rate. 16000Hz is the voices default, not requiring resampling */
156
MOD_OPTION_1_INT_REG(BaratinooSampleRate, 16000);
159
MOD_OPTION_1_INT_REG(BaratinooMinRate, -100);
160
MOD_OPTION_1_INT_REG(BaratinooNormalRate, 0);
161
MOD_OPTION_1_INT_REG(BaratinooMaxRate, 100);
164
MOD_OPTION_1_STR_REG(BaratinooPunctuationList, "@/+-_");
165
MOD_OPTION_1_STR_REG(BaratinooIntonationList, "?!;:,.");
170
int module_init(char **status_info)
172
Engine *engine = &baratinoo_engine;
174
BARATINOOC_STATE state;
176
DBG(DBG_MODNAME "Module init");
177
INIT_INDEX_MARKING();
179
DBG(DBG_MODNAME "BaratinooPunctuationList = %s", BaratinooPunctuationList);
180
DBG(DBG_MODNAME "BaratinooIntonationList = %s", BaratinooIntonationList);
184
engine->pause_requested = FALSE;
185
engine->stop_requested = FALSE;
186
engine->close_requested = FALSE;
189
if (BCinitlib(baratinoo_trace_cb) != BARATINOO_INIT_OK) {
190
DBG(DBG_MODNAME "Failed to initialize library");
191
*status_info = g_strdup("Failed to initialize Baratinoo. "
192
"Make sure your installation is "
196
DBG(DBG_MODNAME "Using Baratinoo %s", BCgetBaratinooVersion());
198
engine->engine = BCnew(NULL);
199
if (!engine->engine) {
200
DBG(DBG_MODNAME "Failed to allocate engine");
201
*status_info = g_strdup("Failed to create Baratinoo engine.");
205
BCinit(engine->engine, BaratinooConfigPath);
206
state = BCgetState(engine->engine);
207
if (state != BARATINOO_INITIALIZED) {
208
DBG(DBG_MODNAME "Failed to initialize engine");
209
*status_info = g_strdup("Failed to initialize Baratinoo engine. "
210
"Make sure your setup is OK.");
215
engine->voice_list = baratinoo_list_voices(engine->engine);
216
if (!engine->voice_list) {
217
DBG(DBG_MODNAME "No voice available");
218
*status_info = g_strdup("No voice found. Make sure your setup "
219
"includes at least one voice.");
223
/* Setup output (audio) signal handling */
224
DBG(DBG_MODNAME "Using PCM output at %dHz", BaratinooSampleRate);
225
engine->output_signal = BCoutputSignalBufferNew(BARATINOO_PCM, BaratinooSampleRate);
226
if (!engine->output_signal) {
227
DBG(DBG_MODNAME "Cannot allocate BCoutputSignalBufferNew");
230
if (BCgetState(engine->engine) != BARATINOO_INITIALIZED) {
231
DBG(DBG_MODNAME "Failed to initialize output signal handler");
232
*status_info = g_strdup("Failed to initialize Baratinoo output "
233
"signal handler. Is the configured "
234
"sample rate correct?");
237
BCoutputTextBufferSetInEngine(engine->output_signal, engine->engine);
239
BCsetWantedEvent(engine->engine, BARATINOO_MARKER_EVENT);
241
/* Setup TTS thread */
242
sem_init(&engine->semaphore, 0, 0);
244
DBG(DBG_MODNAME "creating new thread for baratinoo_speak");
245
ret = pthread_create(&engine->thread, NULL, _baratinoo_speak, engine);
247
DBG(DBG_MODNAME "thread creation failed");
249
g_strdup("The module couldn't initialize threads. "
250
"This could be either an internal problem or an "
251
"architecture problem. If you are sure your architecture "
252
"supports threads, please report a bug.");
256
DBG(DBG_MODNAME "Initialization successfully.");
257
*status_info = g_strdup("Baratinoo initialized successfully.");
262
SPDVoice **module_list_voices(void)
264
Engine *engine = &baratinoo_engine;
266
return engine->voice_list;
269
int module_speak(gchar *data, size_t bytes, SPDMessageType msgtype)
271
Engine *engine = &baratinoo_engine;
272
GString *buffer = NULL;
275
DBG(DBG_MODNAME "Speech requested");
277
assert(msg_settings.rate >= -100 && msg_settings.rate <= +100);
278
assert(msg_settings.pitch >= -100 && msg_settings.pitch <= +100);
279
assert(msg_settings.pitch_range >= -100 && msg_settings.pitch_range <= +100);
280
assert(msg_settings.volume >= -100 && msg_settings.volume <= +100);
282
if (engine->buffer != NULL) {
283
DBG(DBG_MODNAME "WARNING: module_speak() called during speech");
287
engine->pause_requested = FALSE;
288
engine->stop_requested = FALSE;
290
/* select voice following parameters. we don't use tags for this as
291
* we need to do some computation on our end anyway and need pass an
292
* ID when creating the buffer too */
293
/* NOTE: these functions access the engine, which wouldn't be safe if
294
* we didn't know that the thread is sleeping. But we do know it
295
* is, as @c Engine::buffer is NULL */
296
UPDATE_STRING_PARAMETER(voice.language, baratinoo_set_language);
297
UPDATE_PARAMETER(voice_type, baratinoo_set_voice_type);
298
UPDATE_STRING_PARAMETER(voice.name, baratinoo_set_synthesis_voice);
300
engine->buffer = BCinputTextBufferNew(BARATINOO_PROPRIETARY_PARSING,
301
BARATINOO_UTF8, engine->voice, 0);
302
if (!engine->buffer) {
303
DBG(DBG_MODNAME "Failed to allocate input buffer");
307
buffer = g_string_new(NULL);
309
/* Apply speech parameters */
310
if (msg_settings.rate < 0)
311
rate = BaratinooNormalRate + (BaratinooNormalRate - BaratinooMinRate) * msg_settings.rate / 100;
313
rate = BaratinooNormalRate + (BaratinooMaxRate - BaratinooNormalRate) * msg_settings.rate / 100;
316
g_string_append_printf(buffer, "\\rate{%+d%%}", rate);
318
if (msg_settings.pitch != 0 || msg_settings.pitch_range != 0) {
319
g_string_append_printf(buffer, "\\pitch{%+d%% %+d%%}",
321
msg_settings.pitch_range);
323
if (msg_settings.volume != 0) {
324
g_string_append_printf(buffer, "\\volume{%+d%%}",
325
msg_settings.volume);
329
case SPD_MSGTYPE_SPELL: /* FIXME: use \spell one day? */
330
case SPD_MSGTYPE_CHAR:
331
g_string_append(buffer, "\\sayas<{characters}");
332
append_ssml_as_proprietary(engine, buffer, data, bytes);
333
g_string_append(buffer, "\\sayas>{}");
335
default: /* FIXME: */
336
case SPD_MSGTYPE_TEXT:
337
append_ssml_as_proprietary(engine, buffer, data, bytes);
341
DBG(DBG_MODNAME "SSML input: %s", data);
342
DBG(DBG_MODNAME "Sending buffer: %s", buffer->str);
343
if (!BCinputTextBufferInit(engine->buffer, buffer->str)) {
344
DBG(DBG_MODNAME "Failed to initialize input buffer");
348
g_string_free(buffer, TRUE);
350
sem_post(&engine->semaphore);
352
DBG(DBG_MODNAME "leaving module_speak() normally");
357
g_string_free(buffer, TRUE);
358
if (engine->buffer) {
359
BCinputTextBufferDelete(engine->buffer);
360
engine->buffer = NULL;
366
int module_stop(void)
368
Engine *engine = &baratinoo_engine;
370
DBG(DBG_MODNAME "Stop requested");
371
engine->stop_requested = TRUE;
372
if (module_audio_id) {
373
DBG(DBG_MODNAME "Stopping audio currently playing.");
374
if (spd_audio_stop(module_audio_id) != 0)
375
DBG(DBG_MODNAME "spd_audio_stop() returned non-zero value.");
381
size_t module_pause(void)
383
Engine *engine = &baratinoo_engine;
385
DBG(DBG_MODNAME "Pause requested");
386
engine->pause_requested = TRUE;
391
int module_close(void)
393
Engine *engine = &baratinoo_engine;
395
DBG(DBG_MODNAME "close()");
397
DBG(DBG_MODNAME "Terminating threads");
399
/* Politely ask the thread to terminate */
400
engine->stop_requested = TRUE;
401
engine->close_requested = TRUE;
402
sem_post(&engine->semaphore);
403
/* ...and give it a chance to actually quit. */
406
/* Make sure the thread has really exited */
407
pthread_cancel(engine->thread);
408
DBG(DBG_MODNAME "Joining threads.");
409
if (pthread_join(engine->thread, NULL) != 0)
410
DBG(DBG_MODNAME "Failed to join threads.");
412
sem_destroy(&engine->semaphore);
414
/* destroy voice list */
415
if (engine->voice_list != NULL) {
417
for (i = 0; engine->voice_list[i] != NULL; i++) {
418
g_free(engine->voice_list[i]->name);
419
g_free(engine->voice_list[i]->language);
420
g_free(engine->voice_list[i]->variant);
421
g_free(engine->voice_list[i]);
423
g_free(engine->voice_list);
424
engine->voice_list = NULL;
427
/* destroy output signal */
428
BCoutputSignalBufferDeleteSignal(engine->output_signal);
429
engine->output_signal = NULL;
432
if (engine->engine) {
433
BCdelete(engine->engine);
434
engine->engine = NULL;
440
DBG(DBG_MODNAME "Module closed.");
445
/* Internal functions */
448
* @brief Lists voices in SPD format
449
* @param engine An engine.
450
* @returns A NULL-terminated list of @c SPDVoice, or NULL if no voice found.
452
static SPDVoice **baratinoo_list_voices(BCengine *engine)
458
n_voices = BCgetNumberOfVoices(engine);
462
voices = g_malloc_n(n_voices + 1, sizeof *voices);
463
DBG(DBG_MODNAME "Got %d available voices:", n_voices);
464
for (i = 0; i < n_voices; i++) {
467
BaratinooVoiceInfo voice_info = BCgetVoiceInfo(engine, i);
469
DBG(DBG_MODNAME "\tVoice #%d: name=%s, language=%s, gender=%s",
470
i, voice_info.name, voice_info.language, voice_info.gender);
472
voice = g_malloc0(sizeof *voice);
473
voice->name = g_strdup(voice_info.name);
475
dash = strchr(voice_info.language, '-');
477
voice->language = g_strndup(voice_info.language,
478
dash - voice_info.language);
479
voice->variant = g_ascii_strdown(dash + 1, -1);
481
voice->language = g_strdup(voice_info.language);
492
* @brief Internal TTS thread.
493
* @param data An Engine structure.
496
* The TTS thread. It waits on @c Engine::semaphore to consume input data
497
* from @c Engine::buffer.
499
* @see Engine::pause_requested
500
* @see Engine::stop_requested
501
* @see Engine::close_requested
503
static void *_baratinoo_speak(void *data)
505
Engine *engine = data;
506
BARATINOOC_STATE state;
508
set_speaking_thread_parameters();
510
while (!engine->close_requested) {
511
sem_wait(&engine->semaphore);
512
DBG(DBG_MODNAME "Semaphore on");
513
engine->stop_requested = FALSE;
518
state = BCinputTextBufferSetInEngine(engine->buffer, engine->engine);
519
if (state != BARATINOO_READY) {
520
DBG(DBG_MODNAME "Failed to set input buffer");
524
module_report_event_begin();
526
if (engine->stop_requested || engine->close_requested) {
527
DBG(DBG_MODNAME "Stop in child, terminating");
528
BCinputTextBufferDelete(engine->buffer);
529
engine->buffer = NULL;
530
module_report_event_stop();
535
state = BCprocessLoop(engine->engine, -1);
536
if (state == BARATINOO_EVENT) {
537
BaratinooEvent event = BCgetEvent(engine->engine);
538
if (event.type == BARATINOO_MARKER_EVENT) {
539
DBG(DBG_MODNAME "Reached mark '%s'", event.data.marker.name);
540
module_report_index_mark((char *) event.data.marker.name);
541
/* if reached a spd mark and pausing requested, stop */
542
if (engine->pause_requested &&
543
g_str_has_prefix(event.data.marker.name, INDEX_MARK_BODY)) {
544
DBG(DBG_MODNAME "Pausing in thread");
545
state = BCpurge(engine->engine);
546
engine->pause_requested = FALSE;
547
module_report_event_pause();
550
} else if (state == BARATINOO_INPUT_ERROR ||
551
state == BARATINOO_ENGINE_ERROR) {
552
/* CANCEL would be better I guess, but
553
* that's good enough */
554
module_report_event_stop();
556
} while (state == BARATINOO_RUNNING || state == BARATINOO_EVENT);
558
BCinputTextBufferDelete(engine->buffer);
559
engine->buffer = NULL;
561
DBG(DBG_MODNAME "Trying to synthesize text");
562
if (BCoutputSignalBufferIsError(engine->output_signal) || engine->close_requested) {
563
DBG(DBG_MODNAME "Error with the output signal");
564
BCoutputSignalBufferResetSignal(engine->output_signal);
565
module_report_event_stop();
567
baratinoo_output_signal(engine, BCoutputSignalBufferGetSignalBuffer(engine->output_signal), BCoutputSignalBufferGetSignalLength(engine->output_signal));
568
BCoutputSignalBufferResetSignal(engine->output_signal);
569
if (engine->stop_requested || engine->close_requested) {
570
DBG(DBG_MODNAME "Stop in child, terminating");
571
module_report_event_stop();
573
module_report_event_end();
578
engine->stop_requested = FALSE;
581
DBG(DBG_MODNAME "leaving thread with state=%d", state);
586
/* Voice selection */
589
* @brief Matches a Baratinoo voice info against a SPD language
590
* @param info A voice info to match.
591
* @param lang A SPD language to match against.
592
* @returns The quality of the match: the higher the better.
594
* Gives a score to a voice based on its compatibility with @p lang.
596
static int lang_match_level(const BaratinooVoiceInfo *info, const char *lang)
600
if (g_ascii_strcasecmp(lang, info->language) == 0)
603
gchar **a = g_strsplit_set(info->language, "-", 2);
604
gchar **b = g_strsplit_set(lang, "-", 2);
607
if (g_ascii_strcasecmp(a[0], b[0]) == 0)
609
else if (g_ascii_strcasecmp(info->iso639, b[0]) == 0)
611
else if (g_ascii_strncasecmp(a[0], b[0], 2) == 0)
612
level += 5; /* partial match */
614
if (a[1] && b[1] && g_ascii_strcasecmp(a[1], b[1]) == 0)
616
else if (b[1] && g_ascii_strcasecmp(info->iso3166, b[1]) == 0)
618
else if (a[1] && b[1] && g_ascii_strncasecmp(a[1], b[1], 2) == 0)
619
level += 1; /* partial match */
625
DBG(DBG_MODNAME "lang_match_level({language=%s, iso639=%s, iso3166=%s}, lang=%s) = %d",
626
info->language, info->iso639, info->iso3166, lang, level);
632
* @brief Sort two Baratinoo voices by SPD criteria.
633
* @param a A voice info.
634
* @param b Another voice info.
635
* @param lang A SPD language.
636
* @param voice_code A SPD voice code.
637
* @returns < 0 if @p a is best, > 0 if @p b is best, and 0 if they are equally
638
* matching. Larger divergence from 0 means better match.
640
static int sort_voice(const BaratinooVoiceInfo *a, const BaratinooVoiceInfo *b, const char *lang, SPDVoiceType voice_code)
644
cmp -= lang_match_level(a, lang);
645
cmp += lang_match_level(b, lang);
647
if (strcmp(a->gender, b->gender) != 0) {
650
switch (voice_code) {
662
case SPD_CHILD_FEMALE:
667
if (strcmp(gender, a->gender) == 0)
669
if (strcmp(gender, b->gender) == 0)
673
switch (voice_code) {
675
case SPD_CHILD_FEMALE:
676
if (a->age && a->age <= 15)
678
if (b->age && b->age <= 15)
682
/* we expect mostly adult voices, so only compare if age is set */
683
if (a->age && b->age) {
692
DBG(DBG_MODNAME "Comparing %s <> %s gives %d", a->name, b->name, cmp);
697
/* Given a language code and SD voice code, gets the Baratinoo voice. */
698
static int baratinoo_find_voice(const Engine *engine, const char *lang, SPDVoiceType voice_code)
703
int offset = 0; /* nth voice we'd like */
704
BaratinooVoiceInfo best_info;
706
DBG(DBG_MODNAME "baratinoo_find_voice(lang=%s, voice_code=%d)",
709
switch (voice_code) {
720
for (i = 0; i < BCgetNumberOfVoices(engine->engine); i++) {
723
best_info = BCgetVoiceInfo(engine->engine, i);
726
BaratinooVoiceInfo info = BCgetVoiceInfo(engine->engine, i);
727
int cmp = sort_voice(&best_info, &info, lang, voice_code);
732
if (nth_match <= offset) {
744
/* Given a language code and SD voice code, sets the voice. */
745
static void baratinoo_set_language_and_voice(Engine *engine, const char *lang, SPDVoiceType voice_code)
747
int voice = baratinoo_find_voice(engine, lang, voice_code);
750
DBG(DBG_MODNAME "No voice match found, not changing voice.");
752
DBG(DBG_MODNAME "Best voice match is %d.", voice);
753
engine->voice = voice;
757
/* UPDATE_PARAMETER callback to set the voice type */
758
static void baratinoo_set_voice_type(SPDVoiceType voice)
760
Engine *engine = &baratinoo_engine;
762
assert(msg_settings.voice.language);
763
baratinoo_set_language_and_voice(engine, msg_settings.voice.language, voice);
766
/* UPDATE_PARAMETER callback to set the voice language */
767
static void baratinoo_set_language(char *lang)
769
Engine *engine = &baratinoo_engine;
771
baratinoo_set_language_and_voice(engine, lang, msg_settings.voice_type);
774
/* UPDATE_PARAMETER callback to set the voice by name */
775
static void baratinoo_set_synthesis_voice(char *synthesis_voice)
777
Engine *engine = &baratinoo_engine;
780
if (synthesis_voice == NULL)
783
for (i = 0; i < BCgetNumberOfVoices(engine->engine); i++) {
784
BaratinooVoiceInfo info = BCgetVoiceInfo(engine->engine, i);
786
if (g_ascii_strcasecmp(synthesis_voice, info.name) == 0) {
792
DBG(DBG_MODNAME "Failed to set synthesis voice to '%s': not found.",
796
/* Engine callbacks */
799
* @brief Logs a message from Baratinoo
800
* @param level Message importance.
801
* @param engine_num ID of the engine that emitted the message, or 0 if it is a
803
* @param source Message category.
804
* @param data Private data, unused.
805
* @param format printf-like @p format.
806
* @param args arguments for @p format.
808
static void baratinoo_trace_cb(BaratinooTraceLevel level, int engine_num, const char *source, const void *data, const char *format, va_list args)
810
const char *prefix = "";
814
case BARATINOO_TRACE_INIT:
815
case BARATINOO_TRACE_INFO:
816
case BARATINOO_TRACE_DEBUG:
824
case BARATINOO_TRACE_ERROR:
827
case BARATINOO_TRACE_INIT:
830
case BARATINOO_TRACE_WARNING:
833
case BARATINOO_TRACE_INFO:
836
case BARATINOO_TRACE_DEBUG:
842
fprintf(stderr, "Baratinoo library: ");
844
fprintf(stderr, "Baratinoo engine #%d: ", engine_num);
845
fprintf(stderr, "%s: %s ", prefix, source);
846
vfprintf(stderr, format, args);
847
fprintf(stderr, "\n");
851
* @brief Output (sound) callback
852
* @param private_data An Engine structure.
853
* @param address Audio samples.
854
* @param length Length of @p address, in bytes.
855
* @returns Whether to break out of the process loop.
857
* Called by the engine during speech synthesis.
859
* @see BCprocessLoop()
861
static int baratinoo_output_signal(void *private_data, const void *address, int length)
863
Engine *engine = private_data;
865
#if defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)
866
AudioFormat format = SPD_AUDIO_BE;
868
AudioFormat format = SPD_AUDIO_LE;
871
/* If stop is requested during synthesis, abort here to stop speech as
872
* early as possible, even if the engine didn't finish its cycle yet. */
873
if (engine->stop_requested) {
874
DBG(DBG_MODNAME "Not playing message because it got stopped");
875
return engine->stop_requested;
878
/* We receive 16 bits PCM data */
879
track.num_samples = length / 2; /* 16 bits per sample = 2 bytes */
880
track.num_channels = 1;
881
track.sample_rate = BaratinooSampleRate;
883
track.samples = (short *) address;
885
DBG(DBG_MODNAME "Playing part of the message");
886
if (module_tts_output(track, format) < 0)
887
DBG(DBG_MODNAME "ERROR: failed to play the track");
889
return engine->stop_requested;
892
/* SSML conversion functions */
895
const Engine *engine;
897
/* Voice ID stack for the current element */
899
unsigned int voice_stack_len;
902
/* Adds a language change command for @p lang if appropriate */
903
static void ssml2baratinoo_push_lang(SsmlPraserState *state, const char *lang)
907
if (state->voice_stack_len > 0)
908
voice = state->voice_stack[state->voice_stack_len - 1];
910
voice = state->engine->voice;
913
DBG(DBG_MODNAME "Processing xml:lang=\"%s\"", lang);
914
int new_voice = baratinoo_find_voice(&baratinoo_engine, lang,
915
msg_settings.voice_type);
916
if (new_voice >= 0 && new_voice != voice) {
917
g_string_append_printf(state->buffer, "\\vox{%d}", new_voice);
922
if (state->voice_stack_len >= G_N_ELEMENTS(state->voice_stack)) {
923
DBG(DBG_MODNAME "WARNING: voice stack exhausted, expect incorrect voices.");
925
state->voice_stack[state->voice_stack_len++] = voice;
929
/* Pops a language pushed with @c ssml2baratinoo_push_lang() */
930
static void ssml2baratinoo_pop_lang(SsmlPraserState *state)
932
if (state->voice_stack_len > 0) {
933
int cur_voice = state->voice_stack[--state->voice_stack_len];
935
if (state->voice_stack_len > 0) {
936
int new_voice = state->voice_stack[state->voice_stack_len - 1];
938
if (new_voice != cur_voice)
939
g_string_append_printf(state->buffer, "\\vox{%d}", new_voice);
944
/* locates a string in a NULL-terminated array of strings
945
* Returns -1 if not found, the index otherwise. */
946
static int attribute_index(const char **names, const char *name)
950
for (i = 0; names && names[i] != NULL; i++) {
951
if (strcmp(names[i], name) == 0)
958
/* Markup element start callback */
959
static void ssml2baratinoo_start_element(GMarkupParseContext *ctx,
960
const gchar *element,
961
const gchar **attribute_names,
962
const gchar **attribute_values,
963
gpointer data, GError **error)
965
SsmlPraserState *state = data;
968
/* handle voice changes */
969
lang_id = attribute_index(attribute_names, "xml:lang");
970
ssml2baratinoo_push_lang(state, lang_id < 0 ? NULL : attribute_values[lang_id]);
972
/* handle elements */
973
if (strcmp(element, "mark") == 0) {
974
int i = attribute_index(attribute_names, "name");
975
g_string_append_printf(state->buffer, "\\mark{%s}",
976
i < 0 ? "" : attribute_values[i]);
977
} else if (strcmp(element, "emphasis") == 0) {
978
int i = attribute_index(attribute_names, "level");
979
g_string_append_printf(state->buffer, "\\emph<{%s}",
980
i < 0 ? "" : attribute_values[i]);
982
/* ignore other elements */
983
/* TODO: handle more elements */
987
/* Markup element end callback */
988
static void ssml2baratinoo_end_element(GMarkupParseContext *ctx,
989
const gchar *element,
990
gpointer data, GError **error)
992
SsmlPraserState *state = data;
994
if (strcmp(element, "emphasis") == 0) {
995
g_string_append(state->buffer, "\\emph>{}");
998
ssml2baratinoo_pop_lang(state);
1001
/* Markup text node callback.
1003
* This not only converts to the proprietary format (by escaping things that
1004
* would be interpreted by it), but also pre-processes the text for some
1005
* features that are missing from Baratinoo.
1007
* - Punctuation speaking
1009
* As the engine doesn't support speaking of the punctuation itself, we alter
1010
* the input to explicitly tell the engine to do it. It is kind of tricky,
1011
* because we want to keep the punctuation meaning of the characters, e.g. how
1012
* they affect speech as means of intonation and pauses.
1014
* The approach here is that for every punctuation character included in the
1015
* selected mode (none/some/all), we wrap it in "\sayas<{characters}" markup
1016
* so that it is spoken by the engine. But in order to keep the punctuation
1017
* meaning of the character, in case it has some, we duplicate it outside the
1018
* markup with a heuristic on whether it will or not affect speech intonation
1019
* and pauses, and whether or not the engine would speak the character itself
1020
* already (as we definitely don't want to get duplicated speech for a
1022
* This heuristic is as follows:
1023
* - If the character is listed in BaratinooIntonationList and the next
1024
* character is not punctuation or alphanumeric, duplicate the character.
1025
* - Always append a space after a duplicated character, hoping the engine
1026
* won't consider speaking it.
1028
* This won't always give the same results as the engine would by itself, but
1029
* it isn't really possible as the engine behavior is language-dependent in a
1030
* non-obvious fashion. For example, a French voice will speak "1.2.3" as
1031
* "Un. Deux. Trois", while an English one will speak it as "One dot two dot
1032
* three": the dot here didn't have the same interpretation, and wasn't spoken
1033
* the same (once altering the voice, the other spoken plain and simple).
1035
* However, the heuristic here should be highly unlikely to lead to duplicate
1036
* character speaking, and catch most of the intonation and pause cases.
1038
* - Why is this done that way?
1040
* Another, possibly more robust, approach could be using 2 passes in the
1041
* engine itself, and relying on events to get information on how the engine
1042
* interprets the input in the first (silent) pass, and alter it as needed for
1043
* a second (spoken) pass. This wouldn't guarantee the altered input would be
1044
* interpreted the same, but it would seem like a safe enough bet.
1046
* However, the engine is too slow for this to be viable in a real-time
1047
* processing environment for anything but tiny input. Even about 25 lines of
1048
* IRC conversation can easily take several seconds to process in the first
1049
* pass (even without doing any actual pre-processing on our end), delaying
1050
* the actual speech by an unacceptable amount of time.
1052
* Ideally, the engine will some day support speaking punctuation itself, and
1053
* this part of the pre-processing could be dropped.
1055
static void ssml2baratinoo_text(GMarkupParseContext *ctx,
1056
const gchar *text, gsize len,
1057
gpointer data, GError **error)
1059
SsmlPraserState *state = data;
1062
for (p = text; p < (text + len); p = g_utf8_next_char(p)) {
1064
/* escape the \ by appending a comment so it won't be
1065
* interpreted as a command */
1066
g_string_append(state->buffer, "\\\\{}");
1068
gboolean say_as_char;
1069
gunichar ch = g_utf8_get_char(p);
1071
/* if punctuation mode is not NONE and the character
1072
* should be spoken, manually wrap it with \sayas */
1073
say_as_char = ((msg_settings.punctuation_mode == SPD_PUNCT_SOME &&
1074
g_utf8_strchr(BaratinooPunctuationList, -1, ch)) ||
1075
(msg_settings.punctuation_mode == SPD_PUNCT_ALL &&
1076
g_unichar_ispunct(ch)));
1079
g_string_append(state->buffer, "\\sayas<{characters}");
1080
g_string_append_unichar(state->buffer, ch);
1082
g_string_append(state->buffer, "\\sayas>{}");
1084
/* if the character should influence intonation,
1085
* add it back, but *only* if it wouldn't be spoken */
1086
if (g_utf8_strchr(BaratinooIntonationList, -1, ch)) {
1087
const gchar *next = g_utf8_next_char(p);
1090
if (next < text + len)
1091
ch_next = g_utf8_get_char(next);
1095
if (!g_unichar_isalnum(ch_next) &&
1096
!g_unichar_ispunct(ch_next)) {
1097
g_string_append_unichar(state->buffer, ch);
1098
/* Append an extra space to try and
1099
* make sure it's considered as
1100
* punctuation and isn't spoken. */
1101
g_string_append_c(state->buffer, ' ');
1110
* @brief Converts SSML data to Baratinoo's proprietary format.
1111
* @param buf A buffer to write to.
1112
* @param data SSML data to convert.
1113
* @param size Length of @p data
1115
* @warning Only a subset of the input SSML is currently translated, the rest
1118
static void append_ssml_as_proprietary(const Engine *engine, GString *buf, const char *data, gsize size)
1120
/* FIXME: we could possibly use SSML mode, but the Baratinoo parser is
1121
* very strict and *requires* "xmlns", "version" and "lang" attributes
1122
* on the <speak> tag, which speech-dispatcher doesn't provide.
1124
* Moreover, we need to add tags for volume/rate/pitch so we'd have to
1125
* amend the data anyway. */
1126
static const GMarkupParser parser = {
1127
.start_element = ssml2baratinoo_start_element,
1128
.end_element = ssml2baratinoo_end_element,
1129
.text = ssml2baratinoo_text,
1131
SsmlPraserState state = {
1134
.voice_stack_len = 0,
1136
GMarkupParseContext *ctx;
1139
ctx = g_markup_parse_context_new(&parser, G_MARKUP_TREAT_CDATA_AS_TEXT,
1141
if (!g_markup_parse_context_parse(ctx, data, size, &err) ||
1142
!g_markup_parse_context_end_parse(ctx, &err)) {
1143
DBG(DBG_MODNAME "Failed to convert SSML: %s", err->message);
1147
g_markup_parse_context_free(ctx);