BackgroundAudio 1.4.4
Loading...
Searching...
No Matches
BackgroundAudioSpeech.h
1/*
2 BackgroundAudio
3 Plays an audio file using IRQ driven decompression. Main loop() writes
4 data to the buffer but isn't blocked while playing
5
6 Copyright (c) 2024 Earle F. Philhower, III <earlephilhower@yahoo.com>
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20*/
21
22#pragma once
23#include <Arduino.h>
24#include "WrappedAudioOutputBase.h"
25#include "BackgroundAudioGain.h"
26#include "BackgroundAudioBuffers.h"
27#include "libespeak-ng/espeak-ng/speak_lib.h"
28#include "libespeak-ng/phoneme/phonindex.h"
29#include "libespeak-ng/phoneme/phontab.h"
30#include "libespeak-ng/phoneme/phondata.h"
31#include "libespeak-ng/phoneme/intonations.h"
32
33// These will be defined by including a language
34extern const unsigned char __espeakng_dict[];
35extern size_t __espeakng_dictlen;
36
40typedef struct {
42 const char *name;
44 size_t len; // Size of binary data
46 const unsigned char *data;
48
49
55template<class DataBuffer>
57public:
59 _playing = false;
60 _out = nullptr;
61 _paused = false;
62 _voice = nullptr;
63 }
64
70 BackgroundAudioSpeechClass(AudioOutputBase &d) {
71 _playing = false;
72 _paused = false;
73 setDevice(&d);
74 }
75
77
85 bool setDevice(AudioOutputBase *d) {
86 if (!_playing) {
87 _out = d;
88 return true;
89 }
90 return false;
91 }
92
98 void setGain(float scale) {
99 _gain = (int32_t)(scale * (1 << 16));
100 }
101
108 _voice = v.data;
109 _voiceLen = v.len;
110 }
111
117 void setRate(int rate) {
118 espeak_SetParameter(espeakRATE, rate, 0);
119 }
120
126 void setPitch(int pitch) {
127 espeak_SetParameter(espeakPITCH, pitch, 0);
128 }
129
135 void setWordGap(int gap) {
136 espeak_SetParameter(espeakWORDGAP, gap, 0);
137 }
138
144 bool begin() {
145 if (_playing || !_voice || !_voiceLen || !_out) {
146 return false;
147 }
148
149#ifdef ARDUINO_ARCH_RP2040
150 _workIRQ = user_irq_claim_unused(true);
151 _workObj = this;
152 irq_set_exclusive_handler(_workIRQ, _irqStub);
153 irq_set_priority(_workIRQ, 0xc0); // Lowest prio
154 irq_set_enabled(_workIRQ, true);
155#endif
156
157 espeak_EnableSingleStep();
158 espeak_InstallDict(__espeakng_dict, __espeakng_dictlen);
159 espeak_InstallPhonIndex(_phonindex, sizeof(_phonindex));
160 espeak_InstallPhonTab(_phontab, sizeof(_phontab));
161 espeak_InstallPhonData(_phondata, sizeof(_phondata));
162 espeak_InstallIntonations(_intonations, sizeof(_intonations));
163 espeak_InstallVoice(_voice, _voiceLen);
164
165 int samplerate = espeak_Initialize(AUDIO_OUTPUT_SYNCH_PLAYBACK, 20, nullptr, 0);
166 espeak_SetVoiceByFile("INTERNAL");
167 espeak_SetSynthCallback(_speechCB);
168
169 // We will use natural frame size to minimize mismatch
170 _out->setBuffers(5, framelen);
171 _out->onTransmit(&_cb, (void *)this); // The pump we will use to generate our audio
172 _out->setBitsPerSample(16);
173 _out->setStereo(true);
174 _out->setFrequency(samplerate);
175 _out->begin();
176
177 // Stuff with silence to start
178 uint16_t zeros[32] __attribute__((aligned(4))) = {};
179 while (_out->availableForWrite() > 32 * 2) {
180 _out->write((uint8_t *)zeros, sizeof(zeros));
181 }
182
183 _playing = true;
184
185 return true;
186 }
187
191 void end() {
192 if (_playing) {
193#ifdef ARDUINO_ARCH_RP2040
194 irq_set_enabled(_workIRQ, false);
195 user_irq_unclaim(_workIRQ);
196#endif
197 _out->end();
198 }
199 _playing = false;
200 }
201
207 bool playing() {
208 return _playing;
209 }
210
229 size_t write(const void *data, size_t len) {
230 return _ib.write((const uint8_t *)data, len);
231 }
232
239 size_t speak(const char *string) {
240 if (availableForWrite() < strlen(string)) {
241 return 0; // All or nothing
242 }
243 return write((const void *)string, strlen(string) + 1);
244 }
245
252 size_t speak(const String &string) {
253 return speak(string.c_str());
254 }
255
256
269 return _ib.availableForWrite();
270 }
271
277 size_t available() {
278 return _ib.available();
279 }
280
286 bool done() {
287 return !available() && !_generatingSpeech;
288 }
289
295 uint32_t frames() {
296 return _frames;
297 }
298
304 uint32_t shifts() {
305 return _shifts;
306 }
307
313 uint32_t underflows() {
314 return _underflows;
315 }
316
322 uint32_t errors() {
323 return _errors;
324 }
325
331 uint32_t dumps() {
332 return _dumps;
333 }
334
338 void pause() {
339 _paused = true;
340 }
341
347 bool paused() {
348 return _paused;
349 }
350
351
355 void unpause() {
356 _paused = false;
357 }
358
366 void flush() {
367 noInterrupts();
368 _ib.flush();
369 _generatingSpeech = false;
370 short *mono;
371 espeak_SynthesizeOneStep(&mono); // Thrown out
372 espeak_AbortSynthesis();
373 interrupts();
374 }
375
376private:
377#ifdef ARDUINO_ARCH_RP2040
378 static void _irqStub() {
380 }
381
382 static void _cb(void *ptr) {
383 // Don't actually do work in the DMA interrupt, do it in the work IRQ context (low prio)
385 }
386#else
387 static void _cb(void *ptr) {
388 ((BackgroundAudioSpeechClass*)ptr)->pump();
389 }
390#endif
391
392 static int _speechCB(short *data, int count, espeak_EVENT *events) {
393 return 0; // Should never really be called by ESpeak internals
394 }
395
396 void generateOneFrame() {
397 _frameLen = 0;
398
399 // If we're not currently synthesizng speech, is there another string we can say?
400 if (!_generatingSpeech) {
401 if (_ib.available()) {
402 const uint8_t *b = _ib.buffer();
403 for (int i = 0; i < (int)_ib.available(); i++) {
404 if (!b[i]) {
405 espeak_Synth(_ib.buffer(), i, 0, (espeak_POSITION_TYPE)0, 0, espeakCHARS_AUTO, 0, this);
406 _generatingSpeech = true;
407 break;
408 }
409 }
410 }
411 }
412
413 if (_generatingSpeech && !_frameLen) {
414 // Generate the raw samples
415 short *mono;
416 _frameLen = std::min(espeak_SynthesizeOneStep(&mono), framelen);
417 // Now convert to stereo by duplicating channels, store in frame buffer
418 int16_t *ptr = _frame;
419 for (int i = 0; i < _frameLen; i++) {
420 *ptr++ = *mono;
421 *ptr++ = *mono++;
422 }
423 // Amplify if requested
424 ApplyGain(_frame, _frameLen * 2, _gain);
425 // Advance synthesis state and check if done
426 if (!espeak_SynthesisGenerateNext()) {
427 _generatingSpeech = false;
428 _ib.shiftUp(strlen((const char *)_ib.buffer()) + 1); // Only shift out the speech once it's done speaking, easier to track
429 _shifts++;
430 }
431 }
432 }
433
434#ifdef ARDUINO_ARCH_RP2040
435public:
436#endif
437 void pump() {
438 while (_out->availableForWrite() >= (int)(framelen * 4)) {
439 if (!_frameLen && !_paused) {
440 generateOneFrame();
441 }
442 if (_paused || !_frameLen) {
443 bzero(_frame, sizeof(_frame));
444 assert(_out->write((uint8_t *)_frame, sizeof(_frame)) == sizeof(_frame));
445 } else {
446 assert(_out->write((uint8_t *)_frame, _frameLen * 4) == (size_t)(_frameLen * 4));
447 _frameLen = 0;
448 }
449 }
450#ifdef ARDUINO_ARCH_RP2040
451 irq_clear(_workIRQ);
452#endif
453 }
454
455#ifdef ARDUINO_ARCH_RP2040
456 static uint8_t _workIRQ;
458#endif
459
460private:
461 AudioOutputBase *_out;
462 bool _playing = false;
463 bool _paused = false;
464 DataBuffer _ib;
465 int32_t _gain = 1 << 16;
466 bool _generatingSpeech = false;
467 static constexpr int framelen = 1324; // From the 22050 normal samplerate and 20 length
468 int16_t _frame[framelen * 2]; // Overprovision in case we get a long speech frame
469 int _frameLen = 0;
470
471 const unsigned char *_dict;
472 size_t _dictLen;
473 const unsigned char *_voice;
474 size_t _voiceLen;
475
476 // Quality stats, cumulative
477 uint32_t _frames = 0;
478 uint32_t _shifts = 0;
479 uint32_t _underflows = 0;
480 uint32_t _errors = 0;
481 uint32_t _dumps = 0;
482};
483
484#ifdef ARDUINO_ARCH_RP2040
485template<class DataBuffer> uint8_t BackgroundAudioSpeechClass<DataBuffer>::_workIRQ;
487#endif
488
Interrupt-driven ESpeak-NG instance. Generates a full frame of samples each cycle and uses the RawBuf...
Definition BackgroundAudioSpeech.h:56
bool done()
Determine if no more speech is present in the buffer.
Definition BackgroundAudioSpeech.h:286
void setWordGap(int gap)
Adjust the interword gap after begin()
Definition BackgroundAudioSpeech.h:135
uint32_t underflows()
Get the number of times the speaker has underflowed waiting on raw data since begin
Definition BackgroundAudioSpeech.h:313
bool playing()
Determines if the speaker has been started.
Definition BackgroundAudioSpeech.h:207
size_t speak(const char *string)
Speaks a C-String.
Definition BackgroundAudioSpeech.h:239
BackgroundAudioSpeechClass(AudioOutputBase &d)
Construct an output device using the specified physical audio output.
Definition BackgroundAudioSpeech.h:70
bool setDevice(AudioOutputBase *d)
Set an output device before begin
Definition BackgroundAudioSpeech.h:85
bool paused()
Determine if the playback is paused.
Definition BackgroundAudioSpeech.h:347
uint32_t shifts()
Get the number of input data shifts processed by decoder since begin
Definition BackgroundAudioSpeech.h:304
void setPitch(int pitch)
Adjust the pitch, 0...99, with 50 default. After begin()
Definition BackgroundAudioSpeech.h:126
void pause()
Pause the decoder. Won't process raw input data and will transmit silence.
Definition BackgroundAudioSpeech.h:338
uint32_t frames()
Get number of "frames" processed by speaker.
Definition BackgroundAudioSpeech.h:295
uint32_t dumps()
Get the number of full buffer dumps (catastrophic data error) since begin
Definition BackgroundAudioSpeech.h:331
size_t availableForWrite()
Gets number of bytes available to write to raw buffer.
Definition BackgroundAudioSpeech.h:268
void unpause()
Unpause previously paused playback. Will start processing input data again.
Definition BackgroundAudioSpeech.h:355
bool begin()
Starts the background speaker. Will initialize the output device and start sending silence immediatel...
Definition BackgroundAudioSpeech.h:144
size_t available()
Gets number of bytes already in the raw buffer.
Definition BackgroundAudioSpeech.h:277
size_t speak(const String &string)
Speaks an Arduino String.
Definition BackgroundAudioSpeech.h:252
void setRate(int rate)
Set the speaking rate in ~wpm, after calling begin()
Definition BackgroundAudioSpeech.h:117
void end()
Stops the process and the calls the output device's end to shut it down, too.
Definition BackgroundAudioSpeech.h:191
size_t write(const void *data, size_t len)
Writes a block of raw data to the decoder's buffer.
Definition BackgroundAudioSpeech.h:229
void setVoice(BackgroundAudioVoice &v)
Sets the voice parameters (language customization)
Definition BackgroundAudioSpeech.h:107
void flush()
Flushes any existing raw data, resets the processor to start a new speaking.
Definition BackgroundAudioSpeech.h:366
void setGain(float scale)
Set the gain multiplier (volume) for the stream. Takes effect immediately.
Definition BackgroundAudioSpeech.h:98
uint32_t errors()
Get the number of decoder errors since begin
Definition BackgroundAudioSpeech.h:322
Structure to collect a ESpeak-NG voice with its human-readable name.
Definition BackgroundAudioSpeech.h:40
const char * name
Definition BackgroundAudioSpeech.h:42
const unsigned char * data
Definition BackgroundAudioSpeech.h:46
size_t len
Definition BackgroundAudioSpeech.h:44