~vcs-imports/speech-dispatcher/trunk : revision 2980

1

/*

2

* baratinoo.c - Speech Dispatcher backend for Baratinoo (VoxyGen)

3

*

4

5

*

6

* This is free software; you can redistribute it and/or modify it

7

* under the terms of the GNU General Public License as published by

8

* the Free Software Foundation; either version 2, or (at your option)

9

* any later version.

10

*

11

* This software is distributed in the hope that it will be useful,

12

* but WITHOUT ANY WARRANTY; without even the implied warranty of

13

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14

* General Public License for more details.

15

*

16

* You should have received a copy of the GNU General Public License

17

* along with this package; see the file COPYING. If not, write to

18

* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,

19

* Boston, MA 02110-1301, USA.

20

*/

21

22

/*

23

* Input and output choices.

24

*

25

* - The input is sent to the engine through a BCinputTextBuffer. There is

26

* a single one of those at any given time, and it is filled in

27

* module_speak() and consumed in the synthesis thread.

28

*

29

* This doesn't use an input callback generating a continuous flow (and

30

* blocking waiting for more data) even though it would be a fairly nice

31

* design and would allow not to set speech attributes like volume, pitch and

32

* rate as often. This is because the Baratinoo engine has 2 limitations on

33

* the input callback:

34

*

35

* * It consumes everything (or at least a lot) up until the callbacks

36

* reports the input end by returning 0. Alternatively one could use the

37

* \flush command followed by a newline, so this is not really limiting.

38

*

39

* * More problematic, as the buffer callback is expected to feed a single

40

* input, calling BCpurge() (for handling stop events) unregisters it,

41

* requiring to re-add it afterward. This renders the continuous flow a

42

* lot less useful, as speech attributes like volume, pitch and rate would

43

* have to be set again.

44

*

45

* - The output uses the signal buffer instead of callback.

46

* The output callback sends sound to the output module phonem by

47

* phonem, which cause noise parasits with ALSA due to a reset of

48

* parameters for each sound call.

49

*/

50

51

#ifdef HAVE_CONFIG_H

52

#include <config.h>

53

#endif

54

55

#include <semaphore.h>

56

57

#define BARATINOO_C_API

58

#include "baratinoo.h"

59

#include "baratinooio.h"

60

61

#include "spd_audio.h"

62

63

#include <speechd_types.h>

64

65

#include "module_utils.h"

66

67

#define MODULE_NAME "baratinoo"

68

#define DBG_MODNAME "Baratinoo: "

69

#define MODULE_VERSION "0.1"

70

71

#define DEBUG_MODULE 1

72

DECLARE_DEBUG();

73

74

typedef struct {

75

/* Thread primitives */

76

pthread_t thread;

77

sem_t semaphore;

78

79

BCengine engine;

80

/* The buffer consumed by the TTS engine. It is NULL when the TTS

81

* thread is ready to accept new input. Otherwise, the thread is in

82

* the process of synthesizing speech. */

83

BCinputTextBuffer buffer;

84

/* The output signal */

85

BCoutputSignalBuffer output_signal;

86

87

SPDVoice **voice_list;

88

89

/* settings */

90

int voice;

91

92

/* request flags */

93

gboolean pause_requested;

94

gboolean stop_requested;

95

gboolean close_requested;

96

} Engine;

97

98

/* engine and state */

99

static Engine baratinoo_engine = {

100

.engine = NULL,

101

.buffer = NULL,

102

.output_signal = NULL,

103

.voice_list = NULL,

104

.voice = 0,

105

.pause_requested = FALSE,

106

.stop_requested = FALSE,

107

.close_requested = FALSE

108

};

109

110

/* Internal functions prototypes */

111

static void *_baratinoo_speak(void *);

112

static SPDVoice **baratinoo_list_voices(BCengine *engine);

113

/* Parameters */

114

static void baratinoo_set_voice_type(SPDVoiceType voice);

115

static void baratinoo_set_language(char *lang);

116

static void baratinoo_set_synthesis_voice(char *synthesis_voice);

117

/* Engine callbacks */

118

static void baratinoo_trace_cb(BaratinooTraceLevel level, int engine_num, const char *source, const void *data, const char *format, va_list args);

119

static int baratinoo_output_signal(void *privateData, const void *address, int length);

120

/* SSML conversion functions */

121

static void append_ssml_as_proprietary(const Engine *engine, GString *buf, const char *data, gsize size);

122

123

/* Module configuration options */

124

MOD_OPTION_1_STR(BaratinooConfigPath);

125

MOD_OPTION_1_INT(BaratinooSampleRate);

126

MOD_OPTION_1_INT(BaratinooMinRate);

127

MOD_OPTION_1_INT(BaratinooNormalRate);

128

MOD_OPTION_1_INT(BaratinooMaxRate);

129

MOD_OPTION_1_STR(BaratinooPunctuationList);

130

MOD_OPTION_1_STR(BaratinooIntonationList);

131

132

/* Public functions */

133

134

int module_load(void)

135

{

136

const char *conf_env;

137

char *default_config = NULL;

138

139

INIT_SETTINGS_TABLES();

140

141

REGISTER_DEBUG();

142

143

/* BaratinooConfigPath default value comes from the environment or

144

* user XDG configuration location */

145

conf_env = getenv("BARATINOO_CONFIG_PATH");

146

if (conf_env && conf_env[0] != '\0') {

147

default_config = g_strdup(conf_env);

148

} else {

149

default_config = g_build_filename(g_get_user_config_dir(),

150

"baratinoo.cfg", NULL);

151

}

152

MOD_OPTION_1_STR_REG(BaratinooConfigPath, default_config);

153

g_free(default_config);

154

155

/* Sample rate. 16000Hz is the voices default, not requiring resampling */

156

MOD_OPTION_1_INT_REG(BaratinooSampleRate, 16000);

157

158

/* Speech rate */

159

MOD_OPTION_1_INT_REG(BaratinooMinRate, -100);

160

MOD_OPTION_1_INT_REG(BaratinooNormalRate, 0);

161

MOD_OPTION_1_INT_REG(BaratinooMaxRate, 100);

162

163

/* Punctuation */

164

MOD_OPTION_1_STR_REG(BaratinooPunctuationList, "@/+-_");

165

MOD_OPTION_1_STR_REG(BaratinooIntonationList, "?!;:,.");

166

167

return 0;

168

}

169

170

int module_init(char **status_info)

171

{

172

Engine *engine = &baratinoo_engine;

173

int ret;

174

BARATINOOC_STATE state;

175

176

DBG(DBG_MODNAME "Module init");

177

INIT_INDEX_MARKING();

178

179

DBG(DBG_MODNAME "BaratinooPunctuationList = %s", BaratinooPunctuationList);

180

DBG(DBG_MODNAME "BaratinooIntonationList = %s", BaratinooIntonationList);

181

182

*status_info = NULL;

183

184

engine->pause_requested = FALSE;

185

engine->stop_requested = FALSE;

186

engine->close_requested = FALSE;

187

188

/* Init Baratinoo */

189

if (BCinitlib(baratinoo_trace_cb) != BARATINOO_INIT_OK) {

190

DBG(DBG_MODNAME "Failed to initialize library");

191

*status_info = g_strdup("Failed to initialize Baratinoo. "

192

"Make sure your installation is "

193

"properly set up.");

194

return -1;

195

}

196

DBG(DBG_MODNAME "Using Baratinoo %s", BCgetBaratinooVersion());

197

198

engine->engine = BCnew(NULL);

199

if (!engine->engine) {

200

DBG(DBG_MODNAME "Failed to allocate engine");

201

*status_info = g_strdup("Failed to create Baratinoo engine.");

202

return -1;

203

}

204

205

BCinit(engine->engine, BaratinooConfigPath);

206

state = BCgetState(engine->engine);

207

if (state != BARATINOO_INITIALIZED) {

208

DBG(DBG_MODNAME "Failed to initialize engine");

209

*status_info = g_strdup("Failed to initialize Baratinoo engine. "

210

"Make sure your setup is OK.");

211

return -1;

212

}

213

214

/* Find voices */

215

engine->voice_list = baratinoo_list_voices(engine->engine);

216

if (!engine->voice_list) {

217

DBG(DBG_MODNAME "No voice available");

218

*status_info = g_strdup("No voice found. Make sure your setup "

219

"includes at least one voice.");

220

return -1;

221

}

222

223

/* Setup output (audio) signal handling */

224

DBG(DBG_MODNAME "Using PCM output at %dHz", BaratinooSampleRate);

225

engine->output_signal = BCoutputSignalBufferNew(BARATINOO_PCM, BaratinooSampleRate);

226

if (!engine->output_signal) {

227

DBG(DBG_MODNAME "Cannot allocate BCoutputSignalBufferNew");

228

return -1;

229

}

230

if (BCgetState(engine->engine) != BARATINOO_INITIALIZED) {

231

DBG(DBG_MODNAME "Failed to initialize output signal handler");

232

*status_info = g_strdup("Failed to initialize Baratinoo output "

233

"signal handler. Is the configured "

234

"sample rate correct?");

235

return -1;

236

}

237

BCoutputTextBufferSetInEngine(engine->output_signal, engine->engine);

238

239

BCsetWantedEvent(engine->engine, BARATINOO_MARKER_EVENT);

240

241

/* Setup TTS thread */

242

sem_init(&engine->semaphore, 0, 0);

243

244

DBG(DBG_MODNAME "creating new thread for baratinoo_speak");

245

ret = pthread_create(&engine->thread, NULL, _baratinoo_speak, engine);

246

if (ret != 0) {

247

DBG(DBG_MODNAME "thread creation failed");

248

*status_info =

249

g_strdup("The module couldn't initialize threads. "

250

"This could be either an internal problem or an "

251

"architecture problem. If you are sure your architecture "

252

"supports threads, please report a bug.");

253

return -1;

254

}

255

256

DBG(DBG_MODNAME "Initialization successfully.");

257

*status_info = g_strdup("Baratinoo initialized successfully.");

258

259

return 0;

260

}

261

262

SPDVoice **module_list_voices(void)

263

{

264

Engine *engine = &baratinoo_engine;

265

266

return engine->voice_list;

267

}

268

269

int module_speak(gchar *data, size_t bytes, SPDMessageType msgtype)

270

{

271

Engine *engine = &baratinoo_engine;

272

GString *buffer = NULL;

273

int rate;

274

275

DBG(DBG_MODNAME "Speech requested");

276

277

assert(msg_settings.rate >= -100 && msg_settings.rate <= +100);

278

assert(msg_settings.pitch >= -100 && msg_settings.pitch <= +100);

279

assert(msg_settings.pitch_range >= -100 && msg_settings.pitch_range <= +100);

280

assert(msg_settings.volume >= -100 && msg_settings.volume <= +100);

281

282

if (engine->buffer != NULL) {

283

DBG(DBG_MODNAME "WARNING: module_speak() called during speech");

284

return 0;

285

}

286

287

engine->pause_requested = FALSE;

288

engine->stop_requested = FALSE;

289

290

/* select voice following parameters. we don't use tags for this as

291

* we need to do some computation on our end anyway and need pass an

292

* ID when creating the buffer too */

293

/* NOTE: these functions access the engine, which wouldn't be safe if

294

* we didn't know that the thread is sleeping. But we do know it

295

* is, as @c Engine::buffer is NULL */

296

UPDATE_STRING_PARAMETER(voice.language, baratinoo_set_language);

297

UPDATE_PARAMETER(voice_type, baratinoo_set_voice_type);

298

UPDATE_STRING_PARAMETER(voice.name, baratinoo_set_synthesis_voice);

299

300

engine->buffer = BCinputTextBufferNew(BARATINOO_PROPRIETARY_PARSING,

301

BARATINOO_UTF8, engine->voice, 0);

302

if (!engine->buffer) {

303

DBG(DBG_MODNAME "Failed to allocate input buffer");

304

goto err;

305

}

306

307

buffer = g_string_new(NULL);

308

309

/* Apply speech parameters */

310

if (msg_settings.rate < 0)

311

rate = BaratinooNormalRate + (BaratinooNormalRate - BaratinooMinRate) * msg_settings.rate / 100;

312

else

313

rate = BaratinooNormalRate + (BaratinooMaxRate - BaratinooNormalRate) * msg_settings.rate / 100;

314

315

if (rate != 0) {

316

g_string_append_printf(buffer, "\\rate{%+d%%}", rate);

317

}

318

if (msg_settings.pitch != 0 || msg_settings.pitch_range != 0) {

319

g_string_append_printf(buffer, "\\pitch{%+d%% %+d%%}",

320

msg_settings.pitch,

321

msg_settings.pitch_range);

322

}

323

if (msg_settings.volume != 0) {

324

g_string_append_printf(buffer, "\\volume{%+d%%}",

325

msg_settings.volume);

326

}

327

328

switch (msgtype) {

329

case SPD_MSGTYPE_SPELL: /* FIXME: use \spell one day? */

330

case SPD_MSGTYPE_CHAR:

331

g_string_append(buffer, "\\sayas<{characters}");

332

append_ssml_as_proprietary(engine, buffer, data, bytes);

333

g_string_append(buffer, "\\sayas>{}");

334

break;

335

default: /* FIXME: */

336

case SPD_MSGTYPE_TEXT:

337

append_ssml_as_proprietary(engine, buffer, data, bytes);

338

break;

339

}

340

341

DBG(DBG_MODNAME "SSML input: %s", data);

342

DBG(DBG_MODNAME "Sending buffer: %s", buffer->str);

343

if (!BCinputTextBufferInit(engine->buffer, buffer->str)) {

344

DBG(DBG_MODNAME "Failed to initialize input buffer");

345

goto err;

346

}

347

348

g_string_free(buffer, TRUE);

349

350

sem_post(&engine->semaphore);

351

352

DBG(DBG_MODNAME "leaving module_speak() normally");

353

return bytes;

354

355

err:

356

if (buffer)

357

g_string_free(buffer, TRUE);

358

if (engine->buffer) {

359

BCinputTextBufferDelete(engine->buffer);

360

engine->buffer = NULL;

361

}

362

363

return 0;

364

}

365

366

int module_stop(void)

367

{

368

Engine *engine = &baratinoo_engine;

369

370

DBG(DBG_MODNAME "Stop requested");

371

engine->stop_requested = TRUE;

372

if (module_audio_id) {

373

DBG(DBG_MODNAME "Stopping audio currently playing.");

374

if (spd_audio_stop(module_audio_id) != 0)

375

DBG(DBG_MODNAME "spd_audio_stop() returned non-zero value.");

376

}

377

378

return 0;

379

}

380

381

size_t module_pause(void)

382

{

383

Engine *engine = &baratinoo_engine;

384

385

DBG(DBG_MODNAME "Pause requested");

386

engine->pause_requested = TRUE;

387

388

return 0;

389

}

390

391

int module_close(void)

392

{

393

Engine *engine = &baratinoo_engine;

394

395

DBG(DBG_MODNAME "close()");

396

397

DBG(DBG_MODNAME "Terminating threads");

398

399

/* Politely ask the thread to terminate */

400

engine->stop_requested = TRUE;

401

engine->close_requested = TRUE;

402

sem_post(&engine->semaphore);

403

/* ...and give it a chance to actually quit. */

404

g_usleep(25000);

405

406

/* Make sure the thread has really exited */

407

pthread_cancel(engine->thread);

408

DBG(DBG_MODNAME "Joining threads.");

409

if (pthread_join(engine->thread, NULL) != 0)

410

DBG(DBG_MODNAME "Failed to join threads.");

411

412

sem_destroy(&engine->semaphore);

413

414

/* destroy voice list */

415

if (engine->voice_list != NULL) {

416

int i;

417

for (i = 0; engine->voice_list[i] != NULL; i++) {

418

g_free(engine->voice_list[i]->name);

419

g_free(engine->voice_list[i]->language);

420

g_free(engine->voice_list[i]->variant);

421

g_free(engine->voice_list[i]);

422

}

423

g_free(engine->voice_list);

424

engine->voice_list = NULL;

425

}

426

427

/* destroy output signal */

428

BCoutputSignalBufferDeleteSignal(engine->output_signal);

429

engine->output_signal = NULL;

430

431

/* destroy engine */

432

if (engine->engine) {

433

BCdelete(engine->engine);

434

engine->engine = NULL;

435

}

436

437

/* uninitialize */

438

BCterminatelib();

439

440

DBG(DBG_MODNAME "Module closed.");

441

442

return 0;

443

}

444

445

/* Internal functions */

446

447

/**

448

* @brief Lists voices in SPD format

449

* @param engine An engine.

450

* @returns A NULL-terminated list of @c SPDVoice, or NULL if no voice found.

451

*/

452

static SPDVoice **baratinoo_list_voices(BCengine *engine)

453

{

454

SPDVoice **voices;

455

int n_voices;

456

int i;

457

458

n_voices = BCgetNumberOfVoices(engine);

459

if (n_voices < 1)

460

return NULL;

461

462

voices = g_malloc_n(n_voices + 1, sizeof *voices);

463

DBG(DBG_MODNAME "Got %d available voices:", n_voices);

464

for (i = 0; i < n_voices; i++) {

465

SPDVoice *voice;

466

const char *dash;

467

BaratinooVoiceInfo voice_info = BCgetVoiceInfo(engine, i);

468

469

DBG(DBG_MODNAME "\tVoice #%d: name=%s, language=%s, gender=%s",

470

i, voice_info.name, voice_info.language, voice_info.gender);

471

472

voice = g_malloc0(sizeof *voice);

473

voice->name = g_strdup(voice_info.name);

474

475

dash = strchr(voice_info.language, '-');

476

if (dash) {

477

voice->language = g_strndup(voice_info.language,

478

dash - voice_info.language);

479

voice->variant = g_ascii_strdown(dash + 1, -1);

480

} else {

481

voice->language = g_strdup(voice_info.language);

482

}

483

484

voices[i] = voice;

485

}

486

voices[i] = NULL;

487

488

return voices;

489

}

490

491

/**

492

* @brief Internal TTS thread.

493

* @param data An Engine structure.

494

* @returns NULL.

495

*

496

* The TTS thread. It waits on @c Engine::semaphore to consume input data

497

* from @c Engine::buffer.

498

*

499

* @see Engine::pause_requested

500

* @see Engine::stop_requested

501

* @see Engine::close_requested

502

*/

503

static void *_baratinoo_speak(void *data)

504

{

505

Engine *engine = data;

506

BARATINOOC_STATE state;

507

508

set_speaking_thread_parameters();

509

510

while (!engine->close_requested) {

511

sem_wait(&engine->semaphore);

512

DBG(DBG_MODNAME "Semaphore on");

513

engine->stop_requested = FALSE;

514

515

if (!engine->buffer)

516

continue;

517

518

state = BCinputTextBufferSetInEngine(engine->buffer, engine->engine);

519

if (state != BARATINOO_READY) {

520

DBG(DBG_MODNAME "Failed to set input buffer");

521

continue;

522

}

523

524

module_report_event_begin();

525

while (1) {

526

if (engine->stop_requested || engine->close_requested) {

527

DBG(DBG_MODNAME "Stop in child, terminating");

528

BCinputTextBufferDelete(engine->buffer);

529

engine->buffer = NULL;

530

module_report_event_stop();

531

break;

532

}

533

534

do {

535

state = BCprocessLoop(engine->engine, -1);

536

if (state == BARATINOO_EVENT) {

537

BaratinooEvent event = BCgetEvent(engine->engine);

538

if (event.type == BARATINOO_MARKER_EVENT) {

539

DBG(DBG_MODNAME "Reached mark '%s'", event.data.marker.name);

540

module_report_index_mark((char *) event.data.marker.name);

541

/* if reached a spd mark and pausing requested, stop */

542

if (engine->pause_requested &&

543

g_str_has_prefix(event.data.marker.name, INDEX_MARK_BODY)) {

544

DBG(DBG_MODNAME "Pausing in thread");

545

state = BCpurge(engine->engine);

546

engine->pause_requested = FALSE;

547

module_report_event_pause();

548

}

549

}

550

} else if (state == BARATINOO_INPUT_ERROR ||

551

state == BARATINOO_ENGINE_ERROR) {

552

/* CANCEL would be better I guess, but

553

* that's good enough */

554

module_report_event_stop();

555

}

556

} while (state == BARATINOO_RUNNING || state == BARATINOO_EVENT);

557

558

BCinputTextBufferDelete(engine->buffer);

559

engine->buffer = NULL;

560

561

DBG(DBG_MODNAME "Trying to synthesize text");

562

if (BCoutputSignalBufferIsError(engine->output_signal) || engine->close_requested) {

563

DBG(DBG_MODNAME "Error with the output signal");

564

BCoutputSignalBufferResetSignal(engine->output_signal);

565

module_report_event_stop();

566

} else {

567

baratinoo_output_signal(engine, BCoutputSignalBufferGetSignalBuffer(engine->output_signal), BCoutputSignalBufferGetSignalLength(engine->output_signal));

568

BCoutputSignalBufferResetSignal(engine->output_signal);

569

if (engine->stop_requested || engine->close_requested) {

570

DBG(DBG_MODNAME "Stop in child, terminating");

571

module_report_event_stop();

572

} else {

573

module_report_event_end();

574

}

575

}

576

break;

577

}

578

engine->stop_requested = FALSE;

579

}

580

581

DBG(DBG_MODNAME "leaving thread with state=%d", state);

582

583

pthread_exit(NULL);

584

}

585

586

/* Voice selection */

587

588

/**

589

* @brief Matches a Baratinoo voice info against a SPD language

590

* @param info A voice info to match.

591

* @param lang A SPD language to match against.

592

* @returns The quality of the match: the higher the better.

593

*

594

* Gives a score to a voice based on its compatibility with @p lang.

595

*/

596

static int lang_match_level(const BaratinooVoiceInfo *info, const char *lang)

597

{

598

int level = 0;

599

600

if (g_ascii_strcasecmp(lang, info->language) == 0)

601

level += 10;

602

else {

603

gchar **a = g_strsplit_set(info->language, "-", 2);

604

gchar **b = g_strsplit_set(lang, "-", 2);

605

606

/* language */

607

if (g_ascii_strcasecmp(a[0], b[0]) == 0)

608

level += 8;

609

else if (g_ascii_strcasecmp(info->iso639, b[0]) == 0)

610

level += 8;

611

else if (g_ascii_strncasecmp(a[0], b[0], 2) == 0)

612

level += 5; /* partial match */

613

/* region */

614

if (a[1] && b[1] && g_ascii_strcasecmp(a[1], b[1]) == 0)

615

level += 2;

616

else if (b[1] && g_ascii_strcasecmp(info->iso3166, b[1]) == 0)

617

level += 2;

618

else if (a[1] && b[1] && g_ascii_strncasecmp(a[1], b[1], 2) == 0)

619

level += 1; /* partial match */

620

621

g_strfreev(a);

622

g_strfreev(b);

623

}

624

625

DBG(DBG_MODNAME "lang_match_level({language=%s, iso639=%s, iso3166=%s}, lang=%s) = %d",

626

info->language, info->iso639, info->iso3166, lang, level);

627

628

return level;

629

}

630

631

/**

632

* @brief Sort two Baratinoo voices by SPD criteria.

633

* @param a A voice info.

634

* @param b Another voice info.

635

* @param lang A SPD language.

636

* @param voice_code A SPD voice code.

637

* @returns < 0 if @p a is best, > 0 if @p b is best, and 0 if they are equally

638

* matching. Larger divergence from 0 means better match.

639

*/

640

static int sort_voice(const BaratinooVoiceInfo *a, const BaratinooVoiceInfo *b, const char *lang, SPDVoiceType voice_code)

641

{

642

int cmp = 0;

643

644

cmp -= lang_match_level(a, lang);

645

cmp += lang_match_level(b, lang);

646

647

if (strcmp(a->gender, b->gender) != 0) {

648

const char *gender;

649

650

switch (voice_code) {

651

default:

652

case SPD_MALE1:

653

case SPD_MALE2:

654

case SPD_MALE3:

655

case SPD_CHILD_MALE:

656

gender = "male";

657

break;

658

659

case SPD_FEMALE1:

660

case SPD_FEMALE2:

661

case SPD_FEMALE3:

662

case SPD_CHILD_FEMALE:

663

gender = "female";

664

break;

665

}

666

667

if (strcmp(gender, a->gender) == 0)

668

cmp--;

669

if (strcmp(gender, b->gender) == 0)

670

cmp++;

671

}

672

673

switch (voice_code) {

674

case SPD_CHILD_MALE:

675

case SPD_CHILD_FEMALE:

676

if (a->age && a->age <= 15)

677

cmp--;

678

if (b->age && b->age <= 15)

679

cmp++;

680

break;

681

default:

682

/* we expect mostly adult voices, so only compare if age is set */

683

if (a->age && b->age) {

684

if (a->age > 15)

685

cmp--;

686

if (b->age > 15)

687

cmp++;

688

}

689

break;

690

}

691

692

DBG(DBG_MODNAME "Comparing %s <> %s gives %d", a->name, b->name, cmp);

693

694

return cmp;

695

}

696

697

/* Given a language code and SD voice code, gets the Baratinoo voice. */

698

static int baratinoo_find_voice(const Engine *engine, const char *lang, SPDVoiceType voice_code)

699

{

700

int i;

701

int best_match = -1;

702

int nth_match = 0;

703

int offset = 0; /* nth voice we'd like */

704

BaratinooVoiceInfo best_info;

705

706

DBG(DBG_MODNAME "baratinoo_find_voice(lang=%s, voice_code=%d)",

707

lang, voice_code);

708

709

switch (voice_code) {

710

case SPD_MALE3:

711

case SPD_FEMALE3:

712

offset++;

713

case SPD_MALE2:

714

case SPD_FEMALE2:

715

offset++;

716

default:

717

break;

718

}

719

720

for (i = 0; i < BCgetNumberOfVoices(engine->engine); i++) {

721

if (i == 0) {

722

best_match = i;

723

best_info = BCgetVoiceInfo(engine->engine, i);

724

nth_match++;

725

} else {

726

BaratinooVoiceInfo info = BCgetVoiceInfo(engine->engine, i);

727

int cmp = sort_voice(&best_info, &info, lang, voice_code);

728

729

if (cmp >= 0) {

730

if (cmp > 0)

731

nth_match = 0;

732

if (nth_match <= offset) {

733

best_match = i;

734

best_info = info;

735

}

736

nth_match++;

737

}

738

}

739

}

740

741

return best_match;

742

}

743

744

/* Given a language code and SD voice code, sets the voice. */

745

static void baratinoo_set_language_and_voice(Engine *engine, const char *lang, SPDVoiceType voice_code)

746

{

747

int voice = baratinoo_find_voice(engine, lang, voice_code);

748

749

if (voice < 0) {

750

DBG(DBG_MODNAME "No voice match found, not changing voice.");

751

} else {

752

DBG(DBG_MODNAME "Best voice match is %d.", voice);

753

engine->voice = voice;

754

}

755

}

756

757

/* UPDATE_PARAMETER callback to set the voice type */

758

static void baratinoo_set_voice_type(SPDVoiceType voice)

759

{

760

Engine *engine = &baratinoo_engine;

761

762

assert(msg_settings.voice.language);

763

baratinoo_set_language_and_voice(engine, msg_settings.voice.language, voice);

764

}

765

766

/* UPDATE_PARAMETER callback to set the voice language */

767

static void baratinoo_set_language(char *lang)

768

{

769

Engine *engine = &baratinoo_engine;

770

771

baratinoo_set_language_and_voice(engine, lang, msg_settings.voice_type);

772

}

773

774

/* UPDATE_PARAMETER callback to set the voice by name */

775

static void baratinoo_set_synthesis_voice(char *synthesis_voice)

776

{

777

Engine *engine = &baratinoo_engine;

778

int i;

779

780

if (synthesis_voice == NULL)

781

return;

782

783

for (i = 0; i < BCgetNumberOfVoices(engine->engine); i++) {

784

BaratinooVoiceInfo info = BCgetVoiceInfo(engine->engine, i);

785

786

if (g_ascii_strcasecmp(synthesis_voice, info.name) == 0) {

787

engine->voice = i;

788

return;

789

}

790

}

791

792

DBG(DBG_MODNAME "Failed to set synthesis voice to '%s': not found.",

793

synthesis_voice);

794

}

795

796

/* Engine callbacks */

797

798

/**

799

* @brief Logs a message from Baratinoo

800

* @param level Message importance.

801

* @param engine_num ID of the engine that emitted the message, or 0 if it is a

802

* library message.

803

* @param source Message category.

804

* @param data Private data, unused.

805

* @param format printf-like @p format.

806

* @param args arguments for @p format.

807

*/

808

static void baratinoo_trace_cb(BaratinooTraceLevel level, int engine_num, const char *source, const void *data, const char *format, va_list args)

809

{

810

const char *prefix = "";

811

812

if (!Debug) {

813

switch (level) {

814

case BARATINOO_TRACE_INIT:

815

case BARATINOO_TRACE_INFO:

816

case BARATINOO_TRACE_DEBUG:

817

return;

818

default:

819

break;

820

}

821

}

822

823

switch (level) {

824

case BARATINOO_TRACE_ERROR:

825

prefix = "ERROR";

826

break;

827

case BARATINOO_TRACE_INIT:

828

prefix = "INIT";

829

break;

830

case BARATINOO_TRACE_WARNING:

831

prefix = "WARNING";

832

break;

833

case BARATINOO_TRACE_INFO:

834

prefix = "INFO";

835

break;

836

case BARATINOO_TRACE_DEBUG:

837

prefix = "DEBUG";

838

break;

839

}

840

841

if (engine_num == 0)

842

fprintf(stderr, "Baratinoo library: ");

843

else

844

fprintf(stderr, "Baratinoo engine #%d: ", engine_num);

845

fprintf(stderr, "%s: %s ", prefix, source);

846

vfprintf(stderr, format, args);

847

fprintf(stderr, "\n");

848

}

849

850

/**

851

* @brief Output (sound) callback

852

* @param private_data An Engine structure.

853

* @param address Audio samples.

854

* @param length Length of @p address, in bytes.

855

* @returns Whether to break out of the process loop.

856

*

857

* Called by the engine during speech synthesis.

858

*

859

* @see BCprocessLoop()

860

*/

861

static int baratinoo_output_signal(void *private_data, const void *address, int length)

862

{

863

Engine *engine = private_data;

864

AudioTrack track;

865

#if defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)

866

AudioFormat format = SPD_AUDIO_BE;

867

#else

868

AudioFormat format = SPD_AUDIO_LE;

869

#endif

870

871

/* If stop is requested during synthesis, abort here to stop speech as

872

* early as possible, even if the engine didn't finish its cycle yet. */

873

if (engine->stop_requested) {

874

DBG(DBG_MODNAME "Not playing message because it got stopped");

875

return engine->stop_requested;

876

}

877

878

/* We receive 16 bits PCM data */

879

track.num_samples = length / 2; /* 16 bits per sample = 2 bytes */

880

track.num_channels = 1;

881

track.sample_rate = BaratinooSampleRate;

882

track.bits = 16;

883

track.samples = (short *) address;

884

885

DBG(DBG_MODNAME "Playing part of the message");

886

if (module_tts_output(track, format) < 0)

887

DBG(DBG_MODNAME "ERROR: failed to play the track");

888

889

return engine->stop_requested;

890

}

891

892

/* SSML conversion functions */

893

894

typedef struct {

895

const Engine *engine;

896

GString *buffer;

897

/* Voice ID stack for the current element */

898

int voice_stack[32];

899

unsigned int voice_stack_len;

900

} SsmlPraserState;

901

902

/* Adds a language change command for @p lang if appropriate */

903

static void ssml2baratinoo_push_lang(SsmlPraserState *state, const char *lang)

904

{

905

int voice;

906

907

if (state->voice_stack_len > 0)

908

voice = state->voice_stack[state->voice_stack_len - 1];

909

else

910

voice = state->engine->voice;

911

912

if (lang) {

913

DBG(DBG_MODNAME "Processing xml:lang=\"%s\"", lang);

914

int new_voice = baratinoo_find_voice(&baratinoo_engine, lang,

915

msg_settings.voice_type);

916

if (new_voice >= 0 && new_voice != voice) {

917

g_string_append_printf(state->buffer, "\\vox{%d}", new_voice);

918

voice = new_voice;

919

}

920

}

921

922

if (state->voice_stack_len >= G_N_ELEMENTS(state->voice_stack)) {

923

DBG(DBG_MODNAME "WARNING: voice stack exhausted, expect incorrect voices.");

924

} else {

925

state->voice_stack[state->voice_stack_len++] = voice;

926

}

927

}

928

929

/* Pops a language pushed with @c ssml2baratinoo_push_lang() */

930

static void ssml2baratinoo_pop_lang(SsmlPraserState *state)

931

{

932

if (state->voice_stack_len > 0) {

933

int cur_voice = state->voice_stack[--state->voice_stack_len];

934

935

if (state->voice_stack_len > 0) {

936

int new_voice = state->voice_stack[state->voice_stack_len - 1];

937

938

if (new_voice != cur_voice)

939

g_string_append_printf(state->buffer, "\\vox{%d}", new_voice);

940

}

941

}

942

}

943

944

/* locates a string in a NULL-terminated array of strings

945

* Returns -1 if not found, the index otherwise. */

946

static int attribute_index(const char **names, const char *name)

947

{

948

int i;

949

950

for (i = 0; names && names[i] != NULL; i++) {

951

if (strcmp(names[i], name) == 0)

952

return i;

953

}

954

955

return -1;

956

}

957

958

/* Markup element start callback */

959

static void ssml2baratinoo_start_element(GMarkupParseContext *ctx,

960

const gchar *element,

961

const gchar **attribute_names,

962

const gchar **attribute_values,

963

gpointer data, GError **error)

964

{

965

SsmlPraserState *state = data;

966

int lang_id;

967

968

/* handle voice changes */

969

lang_id = attribute_index(attribute_names, "xml:lang");

970

ssml2baratinoo_push_lang(state, lang_id < 0 ? NULL : attribute_values[lang_id]);

971

972

/* handle elements */

973

if (strcmp(element, "mark") == 0) {

974

int i = attribute_index(attribute_names, "name");

975

g_string_append_printf(state->buffer, "\\mark{%s}",

976

i < 0 ? "" : attribute_values[i]);

977

} else if (strcmp(element, "emphasis") == 0) {

978

int i = attribute_index(attribute_names, "level");

979

g_string_append_printf(state->buffer, "\\emph<{%s}",

980

i < 0 ? "" : attribute_values[i]);

981

} else {

982

/* ignore other elements */

983

/* TODO: handle more elements */

984

}

985

}

986

987

/* Markup element end callback */

988

static void ssml2baratinoo_end_element(GMarkupParseContext *ctx,

989

const gchar *element,

990

gpointer data, GError **error)

991

{

992

SsmlPraserState *state = data;

993

994

if (strcmp(element, "emphasis") == 0) {

995

g_string_append(state->buffer, "\\emph>{}");

996

}

997

998

ssml2baratinoo_pop_lang(state);

999

}

1000

1001

/* Markup text node callback.

1002

*

1003

* This not only converts to the proprietary format (by escaping things that

1004

* would be interpreted by it), but also pre-processes the text for some

1005

* features that are missing from Baratinoo.

1006

*

1007

* - Punctuation speaking

1008

*

1009

* As the engine doesn't support speaking of the punctuation itself, we alter

1010

* the input to explicitly tell the engine to do it. It is kind of tricky,

1011

* because we want to keep the punctuation meaning of the characters, e.g. how

1012

* they affect speech as means of intonation and pauses.

1013

*

1014

* The approach here is that for every punctuation character included in the

1015

* selected mode (none/some/all), we wrap it in "\sayas<{characters}" markup

1016

* so that it is spoken by the engine. But in order to keep the punctuation

1017

* meaning of the character, in case it has some, we duplicate it outside the

1018

* markup with a heuristic on whether it will or not affect speech intonation

1019

* and pauses, and whether or not the engine would speak the character itself

1020

* already (as we definitely don't want to get duplicated speech for a

1021

* character).

1022

* This heuristic is as follows:

1023

* - If the character is listed in BaratinooIntonationList and the next

1024

* character is not punctuation or alphanumeric, duplicate the character.

1025

* - Always append a space after a duplicated character, hoping the engine

1026

* won't consider speaking it.

1027

*

1028

* This won't always give the same results as the engine would by itself, but

1029

* it isn't really possible as the engine behavior is language-dependent in a

1030

* non-obvious fashion. For example, a French voice will speak "1.2.3" as

1031

* "Un. Deux. Trois", while an English one will speak it as "One dot two dot

1032

* three": the dot here didn't have the same interpretation, and wasn't spoken

1033

* the same (once altering the voice, the other spoken plain and simple).

1034

*

1035

* However, the heuristic here should be highly unlikely to lead to duplicate

1036

* character speaking, and catch most of the intonation and pause cases.

1037

*

1038

* - Why is this done that way?

1039

*

1040

* Another, possibly more robust, approach could be using 2 passes in the

1041

* engine itself, and relying on events to get information on how the engine

1042

* interprets the input in the first (silent) pass, and alter it as needed for

1043

* a second (spoken) pass. This wouldn't guarantee the altered input would be

1044

* interpreted the same, but it would seem like a safe enough bet.

1045

*

1046

* However, the engine is too slow for this to be viable in a real-time

1047

* processing environment for anything but tiny input. Even about 25 lines of

1048

* IRC conversation can easily take several seconds to process in the first

1049

* pass (even without doing any actual pre-processing on our end), delaying

1050

* the actual speech by an unacceptable amount of time.

1051

*

1052

* Ideally, the engine will some day support speaking punctuation itself, and

1053

* this part of the pre-processing could be dropped.

1054

*/

1055

static void ssml2baratinoo_text(GMarkupParseContext *ctx,

1056

const gchar *text, gsize len,

1057

gpointer data, GError **error)

1058

{

1059

SsmlPraserState *state = data;

1060

const gchar *p;

1061

1062

for (p = text; p < (text + len); p = g_utf8_next_char(p)) {

1063

if (*p == '\\') {

1064

/* escape the \ by appending a comment so it won't be

1065

* interpreted as a command */

1066

g_string_append(state->buffer, "\\\\{}");

1067

} else {

1068

gboolean say_as_char;

1069

gunichar ch = g_utf8_get_char(p);

1070

1071

/* if punctuation mode is not NONE and the character

1072

* should be spoken, manually wrap it with \sayas */

1073

say_as_char = ((msg_settings.punctuation_mode == SPD_PUNCT_SOME &&

1074

g_utf8_strchr(BaratinooPunctuationList, -1, ch)) ||

1075

(msg_settings.punctuation_mode == SPD_PUNCT_ALL &&

1076

g_unichar_ispunct(ch)));

1077

1078

if (say_as_char)

1079

g_string_append(state->buffer, "\\sayas<{characters}");

1080

g_string_append_unichar(state->buffer, ch);

1081

if (say_as_char) {

1082

g_string_append(state->buffer, "\\sayas>{}");

1083

1084

/* if the character should influence intonation,

1085

* add it back, but *only* if it wouldn't be spoken */

1086

if (g_utf8_strchr(BaratinooIntonationList, -1, ch)) {

1087

const gchar *next = g_utf8_next_char(p);

1088

gunichar ch_next;

1089

1090

if (next < text + len)

1091

ch_next = g_utf8_get_char(next);

1092

else

1093

ch_next = '\n';

1094

1095

if (!g_unichar_isalnum(ch_next) &&

1096

!g_unichar_ispunct(ch_next)) {

1097

g_string_append_unichar(state->buffer, ch);

1098

/* Append an extra space to try and

1099

* make sure it's considered as

1100

* punctuation and isn't spoken. */

1101

g_string_append_c(state->buffer, ' ');

1102

}

1103

}

1104

}

1105

}

1106

}

1107

}

1108

1109

/**

1110

* @brief Converts SSML data to Baratinoo's proprietary format.

1111

* @param buf A buffer to write to.

1112

* @param data SSML data to convert.

1113

* @param size Length of @p data

1114

*

1115

* @warning Only a subset of the input SSML is currently translated, the rest

1116

* being discarded.

1117

*/

1118

static void append_ssml_as_proprietary(const Engine *engine, GString *buf, const char *data, gsize size)

1119

{

1120

/* FIXME: we could possibly use SSML mode, but the Baratinoo parser is

1121

* very strict and *requires* "xmlns", "version" and "lang" attributes

1122

* on the <speak> tag, which speech-dispatcher doesn't provide.

1123

*

1124

* Moreover, we need to add tags for volume/rate/pitch so we'd have to

1125

* amend the data anyway. */

1126

static const GMarkupParser parser = {

1127

.start_element = ssml2baratinoo_start_element,

1128

.end_element = ssml2baratinoo_end_element,

1129

.text = ssml2baratinoo_text,

1130

};

1131

SsmlPraserState state = {

1132

.engine = engine,

1133

.buffer = buf,

1134

.voice_stack_len = 0,

1135

};

1136

GMarkupParseContext *ctx;

1137

GError *err = NULL;

1138

1139

ctx = g_markup_parse_context_new(&parser, G_MARKUP_TREAT_CDATA_AS_TEXT,

1140

&state, NULL);

1141

if (!g_markup_parse_context_parse(ctx, data, size, &err) ||

1142

!g_markup_parse_context_end_parse(ctx, &err)) {

1143

DBG(DBG_MODNAME "Failed to convert SSML: %s", err->message);

1144

g_error_free(err);

1145

}

1146

1147

g_markup_parse_context_free(ctx);

1148

}