BackgroundAudio 1.3.3
Loading...
Searching...
No Matches
BackgroundAudioSpeech.h
1/*
2 BackgroundAudio
3 Plays an audio file using IRQ driven decompression. Main loop() writes
4 data to the buffer but isn't blocked while playing
5
6 Copyright (c) 2024 Earle F. Philhower, III <earlephilhower@yahoo.com>
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#pragma once
23#include <Arduino.h>
24#include "WrappedAudioOutputBase.h"
25#include "BackgroundAudioGain.h"
26#include "BackgroundAudioBuffers.h"
27#include "libespeak-ng/espeak-ng/speak_lib.h"
28#include "libespeak-ng/phoneme/phonindex.h"
29#include "libespeak-ng/phoneme/phontab.h"
30#include "libespeak-ng/phoneme/phondata.h"
31#include "libespeak-ng/phoneme/intonations.h"
32
33// These will be defined by including a language
34extern const unsigned char __espeakng_dict[];
35extern size_t __espeakng_dictlen;
36
40typedef struct {
42 const char *name;
44 size_t len; // Size of binary data
46 const unsigned char *data;
48
49
55template<class DataBuffer>
57public:
59 _playing = false;
60 _out = nullptr;
61 _paused = false;
62 _voice = nullptr;
63 }
64
70 BackgroundAudioSpeechClass(AudioOutputBase &d) {
71 _playing = false;
72 _paused = false;
73 setDevice(&d);
74 }
75
77
85 bool setDevice(AudioOutputBase *d) {
86 if (!_playing) {
87 _out = d;
88 return true;
89 }
90 return false;
91 }
92
98 void setGain(float scale) {
99 _gain = (int32_t)(scale * (1 << 16));
100 }
101
108 _voice = v.data;
109 _voiceLen = v.len;
110 }
111
117 void setRate(int rate) {
118 espeak_SetParameter(espeakRATE, rate, 0);
119 }
120
126 void setPitch(int pitch) {
127 espeak_SetParameter(espeakPITCH, pitch, 0);
128 }
129
135 void setWordGap(int gap) {
136 espeak_SetParameter(espeakWORDGAP, gap, 0);
137 }
138
144 bool begin() {
145 if (_playing || !_voice || !_voiceLen) {
146 return false;
147 }
148
149 espeak_EnableSingleStep();
150 espeak_InstallDict(__espeakng_dict, __espeakng_dictlen);
151 espeak_InstallPhonIndex(_phonindex, sizeof(_phonindex));
152 espeak_InstallPhonTab(_phontab, sizeof(_phontab));
153 espeak_InstallPhonData(_phondata, sizeof(_phondata));
154 espeak_InstallIntonations(_intonations, sizeof(_intonations));
155 espeak_InstallVoice(_voice, _voiceLen);
156
157 int samplerate = espeak_Initialize(AUDIO_OUTPUT_SYNCH_PLAYBACK, 20, nullptr, 0);
158 espeak_SetVoiceByFile("INTERNAL");
159 espeak_SetSynthCallback(_speechCB);
160
161 // We will use natural frame size to minimize mismatch
162 _out->setBuffers(5, framelen);
163 _out->onTransmit(&_cb, (void *)this); // The pump we will use to generate our audio
164 _out->setBitsPerSample(16);
165 _out->setStereo(true);
166 _out->setFrequency(samplerate);
167 _out->begin();
168
169 // Stuff with silence to start
170 uint16_t zeros[32] __attribute__((aligned(4))) = {};
171 while (_out->availableForWrite() > 32) {
172 _out->write((uint8_t *)zeros, sizeof(zeros));
173 }
174
175 _playing = true;
176
177 return true;
178 }
179
183 void end() {
184 if (_playing) {
185 _out->end();
186 }
187 _playing = false;
188 }
189
195 bool playing() {
196 return _playing;
197 }
198
217 size_t write(const void *data, size_t len) {
218 return _ib.write((const uint8_t *)data, len);
219 }
220
227 size_t speak(const char *string) {
228 if (availableForWrite() < strlen(string)) {
229 return 0; // All or nothing
230 }
231 return write((const void *)string, strlen(string) + 1);
232 }
233
240 size_t speak(const String &string) {
241 return speak(string.c_str());
242 }
243
244
257 return _ib.availableForWrite();
258 }
259
265 size_t available() {
266 return _ib.available();
267 }
268
274 bool done() {
275 return !available() && !_generatingSpeech;
276 }
277
283 uint32_t frames() {
284 return _frames;
285 }
286
292 uint32_t shifts() {
293 return _shifts;
294 }
295
301 uint32_t underflows() {
302 return _underflows;
303 }
304
310 uint32_t errors() {
311 return _errors;
312 }
313
319 uint32_t dumps() {
320 return _dumps;
321 }
322
326 void pause() {
327 _paused = true;
328 }
329
335 bool paused() {
336 return _paused;
337 }
338
339
343 void unpause() {
344 _paused = false;
345 }
346
354 void flush() {
355 noInterrupts();
356 _ib.flush();
357 _generatingSpeech = false;
358 short *mono;
359 espeak_SynthesizeOneStep(&mono); // Thrown out
360 espeak_AbortSynthesis();
361 interrupts();
362 }
363
364private:
365 static void _cb(void *ptr) {
366 ((BackgroundAudioSpeechClass*)ptr)->pump();
367 }
368
369 static int _speechCB(short *data, int count, espeak_EVENT *events) {
370 return 0; // Should never really be called by ESpeak internals
371 }
372
373 void generateOneFrame() {
374 _frameLen = 0;
375
376 // If we're not currently synthesizng speech, is there another string we can say?
377 if (!_generatingSpeech) {
378 if (_ib.available()) {
379 const uint8_t *b = _ib.buffer();
380 for (int i = 0; i < (int)_ib.available(); i++) {
381 if (!b[i]) {
382 espeak_Synth(_ib.buffer(), i, 0, (espeak_POSITION_TYPE)0, 0, espeakCHARS_AUTO, 0, this);
383 _generatingSpeech = true;
384 break;
385 }
386 }
387 }
388 }
389
390 if (_generatingSpeech && !_frameLen) {
391 // Generate the raw samples
392 short *mono;
393 _frameLen = std::min(espeak_SynthesizeOneStep(&mono), framelen);
394 // Now convert to stereo by duplicating channels, store in frame buffer
395 int16_t *ptr = _frame;
396 for (int i = 0; i < _frameLen; i++) {
397 *ptr++ = *mono;
398 *ptr++ = *mono++;
399 }
400 // Amplify if requested
401 ApplyGain(_frame, _frameLen * 2, _gain);
402 // Advance synthesis state and check if done
403 if (!espeak_SynthesisGenerateNext()) {
404 _generatingSpeech = false;
405 _ib.shiftUp(strlen((const char *)_ib.buffer()) + 1); // Only shift out the speech once it's done speaking, easier to track
406 _shifts++;
407 }
408 }
409 }
410
411 void pump() {
412 while (_out->availableForWrite() >= (int)framelen) {
413 if (!_frameLen && !_paused) {
414 generateOneFrame();
415 }
416 if (_paused || !_frameLen) {
417 bzero(_frame, sizeof(_frame));
418 _out->write((uint8_t *)_frame, sizeof(_frame));
419 } else {
420 _frameLen -= _out->write((uint8_t *)_frame, _frameLen * 4) / 4;
421 }
422 }
423 }
424
425private:
426 AudioOutputBase *_out;
427 bool _playing = false;
428 bool _paused = false;
429 DataBuffer _ib;
430 int32_t _gain = 1 << 16;
431 bool _generatingSpeech = false;
432 static constexpr int framelen = 1324; // From the 22050 normal samplerate and 20 length
433 int16_t _frame[framelen * 2]; // Overprovision in case we get a long speech frame
434 int _frameLen = 0;
435
436 const unsigned char *_dict;
437 size_t _dictLen;
438 const unsigned char *_voice;
439 size_t _voiceLen;
440
441 // Quality stats, cumulative
442 uint32_t _frames = 0;
443 uint32_t _shifts = 0;
444 uint32_t _underflows = 0;
445 uint32_t _errors = 0;
446 uint32_t _dumps = 0;
447};
448
449
Interrupt-driven ESpeak-NG instance. Generates a full frame of samples each cycle and uses the RawBuf...
Definition BackgroundAudioSpeech.h:56
bool done()
Determine if no more speech is present in the buffer.
Definition BackgroundAudioSpeech.h:274
void setWordGap(int gap)
Adjust the interword gap after begin()
Definition BackgroundAudioSpeech.h:135
uint32_t underflows()
Get the number of times the speaker has underflowed waiting on raw data since begin
Definition BackgroundAudioSpeech.h:301
bool playing()
Determines if the speaker has been started.
Definition BackgroundAudioSpeech.h:195
size_t speak(const char *string)
Speaks a C-String.
Definition BackgroundAudioSpeech.h:227
BackgroundAudioSpeechClass(AudioOutputBase &d)
Construct an output device using the specified physical audio output.
Definition BackgroundAudioSpeech.h:70
bool setDevice(AudioOutputBase *d)
Set an output device before begin
Definition BackgroundAudioSpeech.h:85
bool paused()
Determine if the playback is paused.
Definition BackgroundAudioSpeech.h:335
uint32_t shifts()
Get the number of input data shifts processed by decoder since begin
Definition BackgroundAudioSpeech.h:292
void setPitch(int pitch)
Adjust the pitch, 0...99, with 50 default. After begin()
Definition BackgroundAudioSpeech.h:126
void pause()
Pause the decoder. Won't process raw input data and will transmit silence.
Definition BackgroundAudioSpeech.h:326
uint32_t frames()
Get number of "frames" processed by speaker.
Definition BackgroundAudioSpeech.h:283
uint32_t dumps()
Get the number of full buffer dumps (catastrophic data error) since begin
Definition BackgroundAudioSpeech.h:319
size_t availableForWrite()
Gets number of bytes available to write to raw buffer.
Definition BackgroundAudioSpeech.h:256
void unpause()
Unpause previously paused playback. Will start processing input data again.
Definition BackgroundAudioSpeech.h:343
bool begin()
Starts the background speaker. Will initialize the output device and start sending silence immediatel...
Definition BackgroundAudioSpeech.h:144
size_t available()
Gets number of bytes already in the raw buffer.
Definition BackgroundAudioSpeech.h:265
size_t speak(const String &string)
Speaks an Arduino String.
Definition BackgroundAudioSpeech.h:240
void setRate(int rate)
Set the speaking rate in ~wpm, after calling begin()
Definition BackgroundAudioSpeech.h:117
void end()
Stops the process and the calls the output device's end to shut it down, too.
Definition BackgroundAudioSpeech.h:183
size_t write(const void *data, size_t len)
Writes a block of raw data to the decoder's buffer.
Definition BackgroundAudioSpeech.h:217
void setVoice(BackgroundAudioVoice &v)
Sets the voice parameters (language customization)
Definition BackgroundAudioSpeech.h:107
void flush()
Flushes any existing raw data, resets the processor to start a new speaking.
Definition BackgroundAudioSpeech.h:354
void setGain(float scale)
Set the gain multiplier (volume) for the stream. Takes effect immediately.
Definition BackgroundAudioSpeech.h:98
uint32_t errors()
Get the number of decoder errors since begin
Definition BackgroundAudioSpeech.h:310
Structure to collect a ESpeak-NG voice with its human-readable name.
Definition BackgroundAudioSpeech.h:40
const char * name
Definition BackgroundAudioSpeech.h:42
const unsigned char * data
Definition BackgroundAudioSpeech.h:46
size_t len
Definition BackgroundAudioSpeech.h:44