From 87bfb7e868deac20a954f3c1c5880f9b70e5ab85 Mon Sep 17 00:00:00 2001 From: HenTaiCJN <1197845552@qq.com> Date: Sat, 15 Feb 2025 21:06:19 +0800 Subject: [PATCH 1/2] =?UTF-8?q?tts=E7=9A=84c=E7=BC=96=E5=86=99=E4=B8=8EMic?= =?UTF-8?q?ropython=20C=E6=8B=93=E5=B1=95=E6=96=87=E4=BB=B6=E7=BC=96?= =?UTF-8?q?=E5=86=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../labplus_for_xuejing/mpconfigboard.cmake | 5 + .../labplus_for_xuejing/partitions-8MiB.csv | 15 +- port/builtins/modtest.c | 39 ++++ port/builtins/modtest.c.bk | 44 +++++ port/drivers/esp_tts_wav/wav_decoder.c | 179 ++++++++++++++++++ port/drivers/esp_tts_wav/wav_decoder.h | 39 ++++ port/drivers/esp_tts_wav/wav_encoder.c | 111 +++++++++++ port/drivers/esp_tts_wav/wav_encoder.h | 35 ++++ port/drivers/tts/tts_pcm.c | 90 +++++++++ port/drivers/tts/tts_pcm.c.bk | 116 ++++++++++++ port/drivers/tts/tts_pcm.h | 9 + port/drivers/tts/tts_pcm.h.bk | 9 + port/makeimg.py | 3 +- 13 files changed, 685 insertions(+), 9 deletions(-) create mode 100644 port/builtins/modtest.c create mode 100644 port/builtins/modtest.c.bk create mode 100644 port/drivers/esp_tts_wav/wav_decoder.c create mode 100644 port/drivers/esp_tts_wav/wav_decoder.h create mode 100644 port/drivers/esp_tts_wav/wav_encoder.c create mode 100644 port/drivers/esp_tts_wav/wav_encoder.h create mode 100644 port/drivers/tts/tts_pcm.c create mode 100644 port/drivers/tts/tts_pcm.c.bk create mode 100644 port/drivers/tts/tts_pcm.h create mode 100644 port/drivers/tts/tts_pcm.h.bk diff --git a/port/boards/labplus_for_xuejing/mpconfigboard.cmake b/port/boards/labplus_for_xuejing/mpconfigboard.cmake index a05e46c9..4984bd5b 100644 --- a/port/boards/labplus_for_xuejing/mpconfigboard.cmake +++ b/port/boards/labplus_for_xuejing/mpconfigboard.cmake @@ -42,6 +42,8 @@ set(MICROPY_SOURCE_BOARD ${MPY_PORT_DIR}/drivers/startup/oled.c ${MPY_PORT_DIR}/drivers/startup/startup.c ${MPY_PORT_DIR}/drivers/rfid/mfrc522.c + ${MPY_PORT_DIR}/drivers/tts/tts_pcm.c + ${MPY_PORT_DIR}/drivers/esp_tts_wav/wav_encoder.c ${MPY_PORT_DIR}/lib/utils/pyexec.c ${MPY_PORT_DIR}/builtins/modmusictunes.c ${MPY_PORT_DIR}/builtins/modmusic.c @@ -51,6 +53,7 @@ set(MICROPY_SOURCE_BOARD ${MPY_PORT_DIR}/builtins/modframebuf.c ${MPY_PORT_DIR}/builtins/modaudio.c ${MPY_PORT_DIR}/builtins/modrfid.c + ${MPY_PORT_DIR}/builtins/modtest.c # ${MPY_PORT_DIR}/builtins/mod_audio/audio_player.c # ${MPY_PORT_DIR}/builtins/mod_audio/audio_recorder.c # ${MPY_PORT_DIR}/builtins/mod_audio/vfs_stream.c @@ -60,6 +63,8 @@ set(MICROPY_SOURCE_BOARD set(MICROPY_SOURCE_BOARD_DIR ${MPY_PORT_DIR}/drivers ${MPY_PORT_DIR}/drivers/audio/include + ${MPY_PORT_DIR}/drivers/tts + ${MPY_PORT_DIR}/drivers/esp_tts_wav ${MPY_PORT_DIR}/lib ${MPY_PORT_DIR}/builtins ${MPY_PORT_DIR}/boards diff --git a/port/boards/labplus_for_xuejing/partitions-8MiB.csv b/port/boards/labplus_for_xuejing/partitions-8MiB.csv index 3c955e12..4a8942d4 100644 --- a/port/boards/labplus_for_xuejing/partitions-8MiB.csv +++ b/port/boards/labplus_for_xuejing/partitions-8MiB.csv @@ -1,10 +1,9 @@ # Notes: the offset of the partition table itself is set in # $IDF_PATH/components/partition_table/Kconfig.projbuild. -# Name, Type, SubType, Offset, Size, Flags -nvs, data, nvs, 0x9000, 0x6000, -phy_init, data, phy, 0xf000, 0x1000, -factory, app, factory, 0x10000, 0x200000, -vfs, data, spiffs, 0x210000, 0x1f0000, -font, data, nvs, 0x400000, 0x200000, -user_nvs, data, nvs, 0x7fc000, 0x4000, - +# Name, Type, SubType, Offset, Size, Flags +nvs, data, nvs, 0x9000, 0x6000, +phy_init, data, phy, 0xf000, 0x1000, +factory, app, factory, 0x10000, 0x2f0000, +vfs, data, fat, 0x300000, 0x100000, +font, data, fat, 0x400000, 0x3fc000, +user_nvs, data, nvs, 0x7fc000, 0x4000, diff --git a/port/builtins/modtest.c b/port/builtins/modtest.c new file mode 100644 index 00000000..aaf41bb8 --- /dev/null +++ b/port/builtins/modtest.c @@ -0,0 +1,39 @@ +#include "py/obj.h" +#include "py/runtime.h" +#include "esp_tts.h" +#include "tts_pcm.h" + +#ifndef STATIC +#define STATIC static +#endif + +// MicroPython 绑定函数 +STATIC mp_obj_t mp_tts_generate(mp_obj_t text_obj) { + // 获取文本参数 + const char* text = mp_obj_str_get_str(text_obj); + + // 直接调用 text_to_speech 播放音频 + text_to_speech(text); + + return mp_const_none; // 播放完成后返回 None +} + +STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp_tts_generate_obj, mp_tts_generate); + + +// 模块定义 +STATIC const mp_rom_map_elem_t tts_module_globals_table[] = { + { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_tts) }, + { MP_ROM_QSTR(MP_QSTR_generate), MP_ROM_PTR(&mp_tts_generate_obj) }, +}; +STATIC MP_DEFINE_CONST_DICT(tts_module_globals, tts_module_globals_table); + +// 模块结构体 +const mp_obj_module_t tts_module = { + .base = { &mp_type_module }, + .globals = (mp_obj_dict_t*)&tts_module_globals, +}; + +// 注册模块 +MP_REGISTER_MODULE(MP_QSTR_tts, tts_module); + diff --git a/port/builtins/modtest.c.bk b/port/builtins/modtest.c.bk new file mode 100644 index 00000000..7d0c7b84 --- /dev/null +++ b/port/builtins/modtest.c.bk @@ -0,0 +1,44 @@ +#include "py/obj.h" +#include "py/runtime.h" +#include "esp_tts.h" +#include "tts_pcm.h" + +#ifndef STATIC +#define STATIC static +#endif + +// MicroPython绑定函数 +STATIC mp_obj_t mp_tts_generate(mp_obj_t text_obj) { + // 获取参数 + const char* text = mp_obj_str_get_str(text_obj); + + // 生成 PCM 数据 + int pcm_len = 0; + short* pcm_data = text_to_pcm(text, &pcm_len); + + if (!pcm_data || pcm_len == 0) { + return mp_const_none; // 失败返回 None + } + + // 返回 PCM 数据为字节数组 + return mp_obj_new_bytearray_by_ref(pcm_len * sizeof(short), pcm_data); +} + +STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp_tts_generate_obj, mp_tts_generate); + +// 模块定义 +STATIC const mp_rom_map_elem_t tts_module_globals_table[] = { + { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_tts) }, + { MP_ROM_QSTR(MP_QSTR_generate), MP_ROM_PTR(&mp_tts_generate_obj) }, +}; +STATIC MP_DEFINE_CONST_DICT(tts_module_globals, tts_module_globals_table); + +// 模块结构体 +const mp_obj_module_t tts_module = { + .base = { &mp_type_module }, + .globals = (mp_obj_dict_t*)&tts_module_globals, +}; + +// 注册模块 +MP_REGISTER_MODULE(MP_QSTR_tts, tts_module); + diff --git a/port/drivers/esp_tts_wav/wav_decoder.c b/port/drivers/esp_tts_wav/wav_decoder.c new file mode 100644 index 00000000..f33946bf --- /dev/null +++ b/port/drivers/esp_tts_wav/wav_decoder.c @@ -0,0 +1,179 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 2009 Martin Storsjo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ + +#include "wav_decoder.h" +#include +#include +#include +#include + +#define TAG(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) + +struct wav_decoder { + FILE *wav; + uint32_t data_length; + + int format; + int sample_rate; + int bits_per_sample; + int channels; + int byte_rate; + int block_align; +}; + +static uint32_t read_tag(struct wav_decoder* wr) { + uint32_t tag = 0; + tag = (tag << 8) | fgetc(wr->wav); + tag = (tag << 8) | fgetc(wr->wav); + tag = (tag << 8) | fgetc(wr->wav); + tag = (tag << 8) | fgetc(wr->wav); + return tag; +} + +static uint32_t read_int32(struct wav_decoder* wr) { + uint32_t value = 0; + value |= fgetc(wr->wav) << 0; + value |= fgetc(wr->wav) << 8; + value |= fgetc(wr->wav) << 16; + value |= fgetc(wr->wav) << 24; + return value; +} + +static uint16_t read_int16(struct wav_decoder* wr) { + uint16_t value = 0; + value |= fgetc(wr->wav) << 0; + value |= fgetc(wr->wav) << 8; + return value; +} + +void* wav_decoder_open(const char *filename) { + struct wav_decoder* wr = (struct wav_decoder*) malloc(sizeof(*wr)); + long data_pos = 0; + memset(wr, 0, sizeof(*wr)); + + wr->wav = fopen(filename, "rb"); + if (wr->wav == NULL) { + free(wr); + return NULL; + } + + while (1) { + uint32_t tag, tag2, length; + tag = read_tag(wr); + if (feof(wr->wav)) + break; + length = read_int32(wr); + if (tag != TAG('R', 'I', 'F', 'F') || length < 4) { + fseek(wr->wav, length, SEEK_CUR); + continue; + } + tag2 = read_tag(wr); + length -= 4; + if (tag2 != TAG('W', 'A', 'V', 'E')) { + fseek(wr->wav, length, SEEK_CUR); + continue; + } + // RIFF chunk found, iterate through it + while (length >= 8) { + uint32_t subtag, sublength; + subtag = read_tag(wr); + if (feof(wr->wav)) + break; + sublength = read_int32(wr); + length -= 8; + if (length < sublength) + break; + if (subtag == TAG('f', 'm', 't', ' ')) { + if (sublength < 16) { + // Insufficient data for 'fmt ' + break; + } + wr->format = read_int16(wr); + wr->channels = read_int16(wr); + wr->sample_rate = read_int32(wr); + wr->byte_rate = read_int32(wr); + wr->block_align = read_int16(wr); + wr->bits_per_sample = read_int16(wr); + } else if (subtag == TAG('d', 'a', 't', 'a')) { + data_pos = ftell(wr->wav); + wr->data_length = sublength; + fseek(wr->wav, sublength, SEEK_CUR); + } else { + fseek(wr->wav, sublength, SEEK_CUR); + } + length -= sublength; + } + if (length > 0) { + // Bad chunk? + fseek(wr->wav, length, SEEK_CUR); + } + } + fseek(wr->wav, data_pos, SEEK_SET); + return wr; +} + +void wav_decoder_close(void* obj) { + struct wav_decoder* wr = (struct wav_decoder*) obj; + fclose(wr->wav); + free(wr); +} + +int wav_decoder_get_header(void* obj, int* format, int* channels, int* sample_rate, int* bits_per_sample, unsigned int* data_length) { + struct wav_decoder* wr = (struct wav_decoder*) obj; + if (format) + *format = wr->format; + if (channels) + *channels = wr->channels; + if (sample_rate) + *sample_rate = wr->sample_rate; + if (bits_per_sample) + *bits_per_sample = wr->bits_per_sample; + if (data_length) + *data_length = wr->data_length; + return wr->format && wr->sample_rate; +} + +int wav_decoder_run(void* obj, unsigned char* data, unsigned int length) { + struct wav_decoder* wr = (struct wav_decoder*) obj; + int n; + if (wr->wav == NULL) + return -1; + if (length > wr->data_length) + length = wr->data_length; + n = fread(data, 1, length, wr->wav); + wr->data_length -= length; + return n; +} + +int wav_decoder_get_channel(void* obj) { + + struct wav_decoder* wr = (struct wav_decoder*) obj; + return wr->channels; +} + +int wav_decoder_get_sample_rate(void* obj) { + + struct wav_decoder* wr = (struct wav_decoder*) obj; + return wr->sample_rate; +} + +int wav_decoder_get_data_length(void* obj) { + + struct wav_decoder* wr = (struct wav_decoder*) obj; + return wr->data_length; +} \ No newline at end of file diff --git a/port/drivers/esp_tts_wav/wav_decoder.h b/port/drivers/esp_tts_wav/wav_decoder.h new file mode 100644 index 00000000..9e8c8dd2 --- /dev/null +++ b/port/drivers/esp_tts_wav/wav_decoder.h @@ -0,0 +1,39 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 2009 Martin Storsjo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ + +#ifndef WAV_DECODER_H +#define WAV_DECODER_H + +#ifdef __cplusplus +extern "C" { +#endif + +void* wav_decoder_open(const char *filename); +void wav_decoder_close(void* obj); +int wav_decoder_get_header(void* obj, int* format, int* channels, int* sample_rate, int* bits_per_sample, unsigned int* data_length); +int wav_decoder_run(void* obj, unsigned char* data, unsigned int length); +int wav_decoder_get_sample_rate(void* obj); +int wav_decoder_get_channel(void* obj); +int wav_decoder_get_data_length(void* obj); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/port/drivers/esp_tts_wav/wav_encoder.c b/port/drivers/esp_tts_wav/wav_encoder.c new file mode 100644 index 00000000..aab8f550 --- /dev/null +++ b/port/drivers/esp_tts_wav/wav_encoder.c @@ -0,0 +1,111 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 2009 Martin Storsjo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ + +#include "wav_encoder.h" +#include +#include +#include +#include + +struct wav_encoder { + FILE *wav; + int data_length; + + int sample_rate; + int bits_per_sample; + int channels; +}; + +static void write_string(struct wav_encoder* ww, const char *str) { + fputc(str[0], ww->wav); + fputc(str[1], ww->wav); + fputc(str[2], ww->wav); + fputc(str[3], ww->wav); +} + +static void write_int32(struct wav_encoder* ww, int value) { + fputc((value >> 0) & 0xff, ww->wav); + fputc((value >> 8) & 0xff, ww->wav); + fputc((value >> 16) & 0xff, ww->wav); + fputc((value >> 24) & 0xff, ww->wav); +} + +static void write_int16(struct wav_encoder* ww, int value) { + fputc((value >> 0) & 0xff, ww->wav); + fputc((value >> 8) & 0xff, ww->wav); +} + +static void write_header(struct wav_encoder* ww, int length) { + int bytes_per_frame, bytes_per_sec; + write_string(ww, "RIFF"); + write_int32(ww, 4 + 8 + 16 + 8 + length); + write_string(ww, "WAVE"); + + write_string(ww, "fmt "); + write_int32(ww, 16); + + bytes_per_frame = ww->bits_per_sample/8*ww->channels; + bytes_per_sec = bytes_per_frame*ww->sample_rate; + write_int16(ww, 1); // Format + write_int16(ww, ww->channels); // Channels + write_int32(ww, ww->sample_rate); // Samplerate + write_int32(ww, bytes_per_sec); // Bytes per sec + write_int16(ww, bytes_per_frame); // Bytes per frame + write_int16(ww, ww->bits_per_sample); // Bits per sample + + write_string(ww, "data"); + write_int32(ww, length); +} + +void* wav_encoder_open(const char *filename, int sample_rate, int bits_per_sample, int channels) { + struct wav_encoder* ww = (struct wav_encoder*) malloc(sizeof(*ww)); + memset(ww, 0, sizeof(*ww)); + ww->wav = fopen(filename, "wb"); + if (ww->wav == NULL) { + free(ww); + return NULL; + } + ww->data_length = 0; + ww->sample_rate = sample_rate; + ww->bits_per_sample = bits_per_sample; + ww->channels = channels; + + write_header(ww, ww->data_length); + return ww; +} + +void wav_encoder_close(void* obj) { + struct wav_encoder* ww = (struct wav_encoder*) obj; + if (ww->wav == NULL) { + free(ww); + return; + } + fseek(ww->wav, 0, SEEK_SET); + write_header(ww, ww->data_length); + fclose(ww->wav); + free(ww); +} + +void wav_encoder_run(void* obj, const unsigned char* data, int length) { + struct wav_encoder* ww = (struct wav_encoder*) obj; + if (ww->wav == NULL) + return; + fwrite(data, length, 1, ww->wav); + ww->data_length += length; +} + diff --git a/port/drivers/esp_tts_wav/wav_encoder.h b/port/drivers/esp_tts_wav/wav_encoder.h new file mode 100644 index 00000000..4fafd271 --- /dev/null +++ b/port/drivers/esp_tts_wav/wav_encoder.h @@ -0,0 +1,35 @@ +/* ------------------------------------------------------------------ + * Copyright (C) 2009 Martin Storsjo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + * ------------------------------------------------------------------- + */ + +#ifndef WAV_ENCODER_H +#define WAV_ENCODER_H + +#ifdef __cplusplus +extern "C" { +#endif + +void* wav_encoder_open(const char *filename, int sample_rate, int bits_per_sample, int channels); +void wav_encoder_close(void* obj); +void wav_encoder_run(void* obj, const unsigned char* data, int length); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/port/drivers/tts/tts_pcm.c b/port/drivers/tts/tts_pcm.c new file mode 100644 index 00000000..671530f7 --- /dev/null +++ b/port/drivers/tts/tts_pcm.c @@ -0,0 +1,90 @@ +#include +#include +#include + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "esp_system.h" +#include "freertos/semphr.h" +#include "esp_tts.h" +#include "esp_tts_voice_xiaole.h" +#include "esp_tts_voice_template.h" +#include "esp_tts_player.h" +#include "ringbuf.h" + +#include "esp_partition.h" +#include "esp_idf_version.h" +#include "bsp_audio.h" + +typedef struct { + char text[256]; + SemaphoreHandle_t done_semaphore; +} tts_task_param_t; + +void tts_task(void* param) { + tts_task_param_t* task_param = (tts_task_param_t*)param; + const esp_partition_t* part = esp_partition_find_first(ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, "font"); + if (part == NULL) { + printf("Couldn't find voice data partition!\n"); + vTaskDelete(NULL); + return; + } + + const void* voicedata; + esp_partition_mmap_handle_t mmap; + esp_err_t err = esp_partition_mmap(part, 0, part->size, ESP_PARTITION_MMAP_DATA, (const void**)&voicedata, &mmap); + if (err != ESP_OK) { + printf("Couldn't map voice data partition!\n"); + vTaskDelete(NULL); + return; + } + + esp_tts_voice_t* voice = esp_tts_voice_set_init(&esp_tts_voice_template, (int16_t*)voicedata); + if (voice == NULL) { + printf("TTS voice init failed!\n"); + return; + } + esp_tts_handle_t* tts_handle = esp_tts_create(voice); + if (!tts_handle || !task_param->text) { + printf("Invalid TTS handle or text!\n"); + return; + } + + if (esp_tts_parse_chinese(tts_handle, task_param->text)) { + int len[1]={0}; + bsp_codec_dev_create(); + //bsp_audio_set_play_vol(50); + bsp_codec_dev_open(16000,1,256000); + do { + short* pcm = esp_tts_stream_play(tts_handle, len, 1); + bsp_audio_play(pcm, len[0] * 2, portMAX_DELAY); + printf("data:%d \n", len[0]); + vTaskDelay(pdMS_TO_TICKS(10)); + } while (len[0] > 0); + } + + esp_tts_stream_reset(tts_handle); + xSemaphoreGive(task_param->done_semaphore); + vTaskDelete(NULL); +} + +void text_to_speech(const char* text) { + static SemaphoreHandle_t done_semaphore = NULL; + if (done_semaphore == NULL) { + done_semaphore = xSemaphoreCreateBinary(); + } + + tts_task_param_t* task_param = malloc(sizeof(tts_task_param_t)); + if (!task_param) { + printf("Failed to allocate memory for task parameters\n"); + return; + } + + strncpy(task_param->text, text, sizeof(task_param->text) - 1); + task_param->text[sizeof(task_param->text) - 1] = '\0'; + task_param->done_semaphore = done_semaphore; + + xTaskCreatePinnedToCore(tts_task, "tts_task", 8192, task_param, 5, NULL, 0); + xSemaphoreTake(done_semaphore, portMAX_DELAY); + free(task_param); +} diff --git a/port/drivers/tts/tts_pcm.c.bk b/port/drivers/tts/tts_pcm.c.bk new file mode 100644 index 00000000..c7e2538a --- /dev/null +++ b/port/drivers/tts/tts_pcm.c.bk @@ -0,0 +1,116 @@ +#include +#include +#include + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "esp_system.h" +#include "freertos/semphr.h" +#include "esp_tts.h" +#include "esp_tts_voice_xiaole.h" +#include "esp_tts_voice_template.h" +#include "esp_tts_player.h" +#include "ringbuf.h" + +#include "esp_partition.h" +#include "esp_idf_version.h" + +typedef struct { + char text[256]; // 存储待转换的文本 + short* pcm_data; // 存储 PCM 数据 + SemaphoreHandle_t done_semaphore; // 任务完成信号量 +} tts_task_param_t; + +static short* pcm_buffer = NULL; +static int pcm_data_len = 0; + +void tts_task(void* param) { + tts_task_param_t* task_param = (tts_task_param_t*)param; + + const esp_partition_t* part = esp_partition_find_first(ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, "font"); + if (part == NULL) { + printf("Couldn't find voice data partition!\n"); + vTaskDelete(NULL); + return; + } + + const void* voicedata; +#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0) + esp_partition_mmap_handle_t mmap; + esp_err_t err = esp_partition_mmap(part, 0, part->size, ESP_PARTITION_MMAP_DATA, (const void**)&voicedata, &mmap); +#else + spi_flash_mmap_handle_t mmap; + esp_err_t err = esp_partition_mmap(part, 0, part->size, SPI_FLASH_MMAP_DATA, &voicedata, &mmap); +#endif + if (err != ESP_OK) { + printf("Couldn't map voice data partition!\n"); + vTaskDelete(NULL); + return; + } + + esp_tts_voice_t* voice = esp_tts_voice_set_init(&esp_tts_voice_template, (int16_t*)voicedata); + if (voice == NULL) { + printf("TTS voice init failed!\n"); + return; + } + esp_tts_handle_t* tts_handle = esp_tts_create(voice); + if (!tts_handle || !task_param || !task_param->text) { + printf("Invalid TTS handle or text!\n"); + return; + } + + pcm_data_len = 0; + if (!pcm_buffer) { + pcm_buffer = malloc(16000 * sizeof(short)); + } + if (!pcm_buffer) { + printf("Failed to allocate PCM buffer\n"); + vTaskDelete(NULL); + return; + } + printf("Parsing text: %s\n", task_param->text); + printf("TTS handle: %p\n", tts_handle); + if (esp_tts_parse_chinese(tts_handle, task_param->text)) { + int len; + int total_len = 0; + printf("Start creat voice\n"); + do { + short* pcm = esp_tts_stream_play(tts_handle, &len, 3); + if (len > 0) { + //if (total_len + len > 16000) break; // 防止缓冲区溢出 + memcpy(pcm_buffer + total_len, pcm, len * sizeof(short)); + total_len += len; + } + vTaskDelay(pdMS_TO_TICKS(10)); // 让出 CPU,避免 WDT 复位 + } while (len > 0); + pcm_data_len = total_len; + } + + esp_tts_stream_reset(tts_handle); + + // 释放任务 + xSemaphoreGive(task_param->done_semaphore); + vTaskDelete(NULL); +} + +// 供外部调用的函数 +short* text_to_pcm(const char* text, int* out_len) { + static SemaphoreHandle_t done_semaphore = NULL; + if (done_semaphore == NULL) { + done_semaphore = xSemaphoreCreateBinary(); + } + + tts_task_param_t task_param; + strncpy(task_param.text, text, sizeof(task_param.text) - 1); + task_param.text[sizeof(task_param.text) - 1] = '\0'; + task_param.pcm_data = NULL; + task_param.done_semaphore = done_semaphore; + + // 启动任务 + xTaskCreatePinnedToCore(tts_task, "tts_task", 8192, &task_param, 5, NULL, 0); + + // 等待任务完成 + xSemaphoreTake(done_semaphore, portMAX_DELAY); + *out_len = pcm_data_len; + return pcm_buffer; +} diff --git a/port/drivers/tts/tts_pcm.h b/port/drivers/tts/tts_pcm.h new file mode 100644 index 00000000..2d0899b0 --- /dev/null +++ b/port/drivers/tts/tts_pcm.h @@ -0,0 +1,9 @@ +#pragma once +#ifdef __cplusplus +extern "C" { +#endif +short* text_to_speech(const char* text); +#ifdef __cplusplus +} +#endif + diff --git a/port/drivers/tts/tts_pcm.h.bk b/port/drivers/tts/tts_pcm.h.bk new file mode 100644 index 00000000..ffc29e7e --- /dev/null +++ b/port/drivers/tts/tts_pcm.h.bk @@ -0,0 +1,9 @@ +#pragma once +#ifdef __cplusplus +extern "C" { +#endif +short* text_to_pcm(const char* text, int* out_len); +#ifdef __cplusplus +} +#endif + diff --git a/port/makeimg.py b/port/makeimg.py index 3d8e0a84..5e2b9bd8 100755 --- a/port/makeimg.py +++ b/port/makeimg.py @@ -104,7 +104,8 @@ def load_partition_table(filename): arg_application_bin = "micropython.bin" arg_output_bin = bin_name arg_output_uf2= uf2_name -arg_font_bin = "../Noto_Sans_CJK_SC_Light16.bin" +# arg_font_bin = "../Noto_Sans_CJK_SC_Light16.bin" +arg_font_bin = "../components/esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaoxin.dat" # Load required sdkconfig values. idf_target = load_sdkconfig_str_value(arg_sdkconfig, "IDF_TARGET", "").upper() From 8e34e526bde949ff667e5e097fd2a168eb7fc153 Mon Sep 17 00:00:00 2001 From: HenTaiCJN <1197845552@qq.com> Date: Sun, 16 Feb 2025 17:20:45 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=92=AD=E6=94=BE?= =?UTF-8?q?=E5=A3=B0=E9=9F=B3=E5=87=BD=E6=95=B0=E8=B0=83=E7=94=A8=E6=B5=81?= =?UTF-8?q?=E7=A8=8B=EF=BC=8C=E4=BF=AE=E6=94=B9=E6=94=B6=E9=9F=B3=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E4=B8=BA=E9=98=BB=E5=A1=9E=EF=BC=8C=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=96=B0=E7=9A=84=E6=92=AD=E6=94=BE=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../labplus_for_xuejing/bsp_audio_board.c | 62 ++++++ .../labplus_for_xuejing/mpconfigboard.cmake | 2 - port/builtins/modaudio.c | 3 +- port/drivers/audio/include/bsp_audio.h | 1 + port/drivers/audio/include/recorder.h | 2 + port/drivers/audio/include/wav_codec.h | 3 + port/drivers/audio/recorder.c | 6 + port/drivers/audio/wav_codec.c | 6 +- port/drivers/esp_tts_wav/wav_decoder.c | 179 ------------------ port/drivers/esp_tts_wav/wav_decoder.h | 39 ---- port/drivers/esp_tts_wav/wav_encoder.c | 111 ----------- port/drivers/esp_tts_wav/wav_encoder.h | 35 ---- port/drivers/tts/tts_pcm.c | 16 +- 13 files changed, 90 insertions(+), 375 deletions(-) delete mode 100644 port/drivers/esp_tts_wav/wav_decoder.c delete mode 100644 port/drivers/esp_tts_wav/wav_decoder.h delete mode 100644 port/drivers/esp_tts_wav/wav_encoder.c delete mode 100644 port/drivers/esp_tts_wav/wav_encoder.h diff --git a/port/boards/labplus_for_xuejing/bsp_audio_board.c b/port/boards/labplus_for_xuejing/bsp_audio_board.c index 67dc2371..71d92039 100644 --- a/port/boards/labplus_for_xuejing/bsp_audio_board.c +++ b/port/boards/labplus_for_xuejing/bsp_audio_board.c @@ -53,6 +53,11 @@ static const audio_codec_ctrl_if_t *codec_ctrl_if = NULL; static const audio_codec_if_t *codec_if = NULL; static esp_codec_dev_handle_t codec_dev = NULL; +static int s_play_sample_rate = 16000; +static int s_play_channel_format = 1; +static int s_bits_per_chan = 16; + + static esp_err_t bsp_i2s_init(uint32_t sample_rate, int channel_format, int bits_per_sample) { esp_err_t ret_val = ESP_OK; @@ -274,6 +279,63 @@ esp_err_t bsp_audio_play(const int8_t *data, int length, TickType_t ticks_to_wa return ret; } +esp_err_t bsp_audio_play2(const int16_t* data, int length, TickType_t ticks_to_wait) +{ + size_t bytes_write = 0; + esp_err_t ret = ESP_OK; + if (!codec_dev) { + return ESP_FAIL; + } + + int out_length= length; + int audio_time = 1; + audio_time *= (16000 / s_play_sample_rate); + audio_time *= (2 / s_play_channel_format); + + int *data_out = NULL; + if (s_bits_per_chan != 32) { + out_length = length * 2; + data_out = malloc(out_length); + for (int i = 0; i < length / sizeof(int16_t); i++) { + int ret = data[i]; + data_out[i] = ret << 16; + } + } + + int *data_out_1 = NULL; + if (s_play_channel_format != 2 || s_play_sample_rate != 16000) { + out_length *= audio_time; + data_out_1 = malloc(out_length); + int *tmp_data = NULL; + if (data_out != NULL) { + tmp_data = data_out; + } else { + tmp_data = data; + } + + for (int i = 0; i < out_length / (audio_time * sizeof(int)); i++) { + for (int j = 0; j < audio_time; j++) { + data_out_1[audio_time * i + j] = tmp_data[i]; + } + } + if (data_out != NULL) { + free(data_out); + data_out = NULL; + } + } + + if (data_out != NULL) { + ret = esp_codec_dev_write(codec_dev, (void *)data_out, out_length); + free(data_out); + } else if (data_out_1 != NULL) { + ret = esp_codec_dev_write(codec_dev, (void *)data_out_1, out_length); + free(data_out_1); + } else { + ret = esp_codec_dev_write(codec_dev, (void *)data, length); + } + + return ret; +} /* 做为afe is_get_raw_channel = true :16K 16bit 左声道 + 右声道,无ref, is_get_raw_channel = false :16K 16bit 左声道 + 右声道 + 一路ref,*/ diff --git a/port/boards/labplus_for_xuejing/mpconfigboard.cmake b/port/boards/labplus_for_xuejing/mpconfigboard.cmake index 4984bd5b..3f50d884 100644 --- a/port/boards/labplus_for_xuejing/mpconfigboard.cmake +++ b/port/boards/labplus_for_xuejing/mpconfigboard.cmake @@ -43,7 +43,6 @@ set(MICROPY_SOURCE_BOARD ${MPY_PORT_DIR}/drivers/startup/startup.c ${MPY_PORT_DIR}/drivers/rfid/mfrc522.c ${MPY_PORT_DIR}/drivers/tts/tts_pcm.c - ${MPY_PORT_DIR}/drivers/esp_tts_wav/wav_encoder.c ${MPY_PORT_DIR}/lib/utils/pyexec.c ${MPY_PORT_DIR}/builtins/modmusictunes.c ${MPY_PORT_DIR}/builtins/modmusic.c @@ -64,7 +63,6 @@ set(MICROPY_SOURCE_BOARD_DIR ${MPY_PORT_DIR}/drivers ${MPY_PORT_DIR}/drivers/audio/include ${MPY_PORT_DIR}/drivers/tts - ${MPY_PORT_DIR}/drivers/esp_tts_wav ${MPY_PORT_DIR}/lib ${MPY_PORT_DIR}/builtins ${MPY_PORT_DIR}/boards diff --git a/port/builtins/modaudio.c b/port/builtins/modaudio.c index 8be6a369..61be6b44 100755 --- a/port/builtins/modaudio.c +++ b/port/builtins/modaudio.c @@ -135,6 +135,7 @@ static MP_DEFINE_CONST_FUN_OBJ_0(audio_loudness_obj, audio_loudness); static mp_obj_t audio_record(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { + audio_init(); enum { ARG_file_name, ARG_record_time, ARG_bits_per_sample, ARG_channels, ARG_sampleRate}; static const mp_arg_t allowed_args[] = { { MP_QSTR_file_name, MP_ARG_REQUIRED | MP_ARG_OBJ }, @@ -193,4 +194,4 @@ const mp_obj_module_t mp_module_audio = { }; MP_REGISTER_MODULE(MP_QSTR_audio, mp_module_audio); -// #endif //#if MICROPY_PY_AUDIO \ No newline at end of file +// #endif //#if MICROPY_PY_AUDIO diff --git a/port/drivers/audio/include/bsp_audio.h b/port/drivers/audio/include/bsp_audio.h index 916c72af..5483936a 100644 --- a/port/drivers/audio/include/bsp_audio.h +++ b/port/drivers/audio/include/bsp_audio.h @@ -67,6 +67,7 @@ esp_err_t bsp_codec_dev_open(int sample_rate, int channel_format, int bits_per_s esp_err_t bsp_codec_dev_close(void); esp_err_t bsp_audio_play(const int8_t *data, int length, TickType_t ticks_to_wait); +esp_err_t bsp_audio_play2(const int16_t *data, int length, TickType_t ticks_to_wait); /** * @brief Get the record pcm data. diff --git a/port/drivers/audio/include/recorder.h b/port/drivers/audio/include/recorder.h index 4eb59c52..024398dc 100644 --- a/port/drivers/audio/include/recorder.h +++ b/port/drivers/audio/include/recorder.h @@ -1,6 +1,7 @@ #ifndef __RECORDER_ #define __RECORDER_ #include "freertos/FreeRTOS.h" +#include "freertos/semphr.h" #include "freertos/event_groups.h" #include "ringbuf.h" #include "py/obj.h" @@ -27,5 +28,6 @@ typedef struct void recorder_record(const char *filename, wav_fmt_t fmt, int time); extern recorder_handle_t *recorder; +extern SemaphoreHandle_t xBinarySemaphore; #endif diff --git a/port/drivers/audio/include/wav_codec.h b/port/drivers/audio/include/wav_codec.h index 72ef9281..fc77cdef 100644 --- a/port/drivers/audio/include/wav_codec.h +++ b/port/drivers/audio/include/wav_codec.h @@ -23,6 +23,9 @@ #include "py/obj.h" #include "freertos/ringbuf.h" #include "wave_head.h" +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" + #ifdef __cplusplus extern "C" { diff --git a/port/drivers/audio/recorder.c b/port/drivers/audio/recorder.c index 1704bbe7..60002434 100755 --- a/port/drivers/audio/recorder.c +++ b/port/drivers/audio/recorder.c @@ -11,6 +11,8 @@ #include #include #include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/semphr.h" #include "esp_system.h" #include "esp_log.h" #include "py/stream.h" @@ -23,6 +25,7 @@ #include "player.h" #include "msg.h" + // #define TAG "recorder" recorder_handle_t *recorder = NULL; @@ -65,7 +68,10 @@ void recorder_record(const char *filename, wav_fmt_t fmt, int time) recorder->total_frames = recorder->time * (recorder->wav_fmt.sampleRate * recorder->wav_fmt.channels * recorder->wav_fmt.bits_per_sample / 8) / READ_RINGBUF_BLOCK_SIZE; // ESP_LOGE(TAG, "record total frame: %d, time: %d", recorder->total_frames, recorder->time); recorder->file_uri = filename; + xBinarySemaphore = xSemaphoreCreateBinary(); xTaskCreatePinnedToCore(&stream_i2s_read_task, "stream_i2s_read_task", 4 * 1024, (void*)recorder, 8, NULL, CORE_NUM1); + xSemaphoreTake(xBinarySemaphore, portMAX_DELAY); + vSemaphoreDelete(xBinarySemaphore); } void record_deinit(void) diff --git a/port/drivers/audio/wav_codec.c b/port/drivers/audio/wav_codec.c index bf883f16..6a919be6 100644 --- a/port/drivers/audio/wav_codec.c +++ b/port/drivers/audio/wav_codec.c @@ -19,6 +19,7 @@ #include "esp_log.h" #include "freertos/FreeRTOS.h" #include "freertos/task.h" +#include "freertos/semphr.h" #include #include #include @@ -29,6 +30,8 @@ #include "player.h" #include "recorder.h" +SemaphoreHandle_t xBinarySemaphore; // 定义信号量 + static const char *TAG = "wav_codec"; wav_codec_t *wav_file_open(const char *path_in, int mode) @@ -99,7 +102,7 @@ void wav_file_read_task(void *arg) if(!wav_codec){ break; } player->wav_codec = wav_codec; - if(!wav_codec_ringbuff){ + if(!wav_codec_ringbuff){/ wav_codec_ringbuff = xRingbufferCreate(STREAM_OUT_RINGBUF_SIZE, RINGBUF_TYPE_BYTEBUF); if(!wav_codec_ringbuff){ break; } }else{ @@ -291,5 +294,6 @@ void stream_i2s_read_task(void *arg) free(stream_buff); } // ESP_LOGE(TAG, "stream in task end, RAM left: %ld", esp_get_free_heap_size()); + xSemaphoreGive(xBinarySemaphore); vTaskDelete(NULL); } diff --git a/port/drivers/esp_tts_wav/wav_decoder.c b/port/drivers/esp_tts_wav/wav_decoder.c deleted file mode 100644 index f33946bf..00000000 --- a/port/drivers/esp_tts_wav/wav_decoder.c +++ /dev/null @@ -1,179 +0,0 @@ -/* ------------------------------------------------------------------ - * Copyright (C) 2009 Martin Storsjo - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. - * See the License for the specific language governing permissions - * and limitations under the License. - * ------------------------------------------------------------------- - */ - -#include "wav_decoder.h" -#include -#include -#include -#include - -#define TAG(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) - -struct wav_decoder { - FILE *wav; - uint32_t data_length; - - int format; - int sample_rate; - int bits_per_sample; - int channels; - int byte_rate; - int block_align; -}; - -static uint32_t read_tag(struct wav_decoder* wr) { - uint32_t tag = 0; - tag = (tag << 8) | fgetc(wr->wav); - tag = (tag << 8) | fgetc(wr->wav); - tag = (tag << 8) | fgetc(wr->wav); - tag = (tag << 8) | fgetc(wr->wav); - return tag; -} - -static uint32_t read_int32(struct wav_decoder* wr) { - uint32_t value = 0; - value |= fgetc(wr->wav) << 0; - value |= fgetc(wr->wav) << 8; - value |= fgetc(wr->wav) << 16; - value |= fgetc(wr->wav) << 24; - return value; -} - -static uint16_t read_int16(struct wav_decoder* wr) { - uint16_t value = 0; - value |= fgetc(wr->wav) << 0; - value |= fgetc(wr->wav) << 8; - return value; -} - -void* wav_decoder_open(const char *filename) { - struct wav_decoder* wr = (struct wav_decoder*) malloc(sizeof(*wr)); - long data_pos = 0; - memset(wr, 0, sizeof(*wr)); - - wr->wav = fopen(filename, "rb"); - if (wr->wav == NULL) { - free(wr); - return NULL; - } - - while (1) { - uint32_t tag, tag2, length; - tag = read_tag(wr); - if (feof(wr->wav)) - break; - length = read_int32(wr); - if (tag != TAG('R', 'I', 'F', 'F') || length < 4) { - fseek(wr->wav, length, SEEK_CUR); - continue; - } - tag2 = read_tag(wr); - length -= 4; - if (tag2 != TAG('W', 'A', 'V', 'E')) { - fseek(wr->wav, length, SEEK_CUR); - continue; - } - // RIFF chunk found, iterate through it - while (length >= 8) { - uint32_t subtag, sublength; - subtag = read_tag(wr); - if (feof(wr->wav)) - break; - sublength = read_int32(wr); - length -= 8; - if (length < sublength) - break; - if (subtag == TAG('f', 'm', 't', ' ')) { - if (sublength < 16) { - // Insufficient data for 'fmt ' - break; - } - wr->format = read_int16(wr); - wr->channels = read_int16(wr); - wr->sample_rate = read_int32(wr); - wr->byte_rate = read_int32(wr); - wr->block_align = read_int16(wr); - wr->bits_per_sample = read_int16(wr); - } else if (subtag == TAG('d', 'a', 't', 'a')) { - data_pos = ftell(wr->wav); - wr->data_length = sublength; - fseek(wr->wav, sublength, SEEK_CUR); - } else { - fseek(wr->wav, sublength, SEEK_CUR); - } - length -= sublength; - } - if (length > 0) { - // Bad chunk? - fseek(wr->wav, length, SEEK_CUR); - } - } - fseek(wr->wav, data_pos, SEEK_SET); - return wr; -} - -void wav_decoder_close(void* obj) { - struct wav_decoder* wr = (struct wav_decoder*) obj; - fclose(wr->wav); - free(wr); -} - -int wav_decoder_get_header(void* obj, int* format, int* channels, int* sample_rate, int* bits_per_sample, unsigned int* data_length) { - struct wav_decoder* wr = (struct wav_decoder*) obj; - if (format) - *format = wr->format; - if (channels) - *channels = wr->channels; - if (sample_rate) - *sample_rate = wr->sample_rate; - if (bits_per_sample) - *bits_per_sample = wr->bits_per_sample; - if (data_length) - *data_length = wr->data_length; - return wr->format && wr->sample_rate; -} - -int wav_decoder_run(void* obj, unsigned char* data, unsigned int length) { - struct wav_decoder* wr = (struct wav_decoder*) obj; - int n; - if (wr->wav == NULL) - return -1; - if (length > wr->data_length) - length = wr->data_length; - n = fread(data, 1, length, wr->wav); - wr->data_length -= length; - return n; -} - -int wav_decoder_get_channel(void* obj) { - - struct wav_decoder* wr = (struct wav_decoder*) obj; - return wr->channels; -} - -int wav_decoder_get_sample_rate(void* obj) { - - struct wav_decoder* wr = (struct wav_decoder*) obj; - return wr->sample_rate; -} - -int wav_decoder_get_data_length(void* obj) { - - struct wav_decoder* wr = (struct wav_decoder*) obj; - return wr->data_length; -} \ No newline at end of file diff --git a/port/drivers/esp_tts_wav/wav_decoder.h b/port/drivers/esp_tts_wav/wav_decoder.h deleted file mode 100644 index 9e8c8dd2..00000000 --- a/port/drivers/esp_tts_wav/wav_decoder.h +++ /dev/null @@ -1,39 +0,0 @@ -/* ------------------------------------------------------------------ - * Copyright (C) 2009 Martin Storsjo - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. - * See the License for the specific language governing permissions - * and limitations under the License. - * ------------------------------------------------------------------- - */ - -#ifndef WAV_DECODER_H -#define WAV_DECODER_H - -#ifdef __cplusplus -extern "C" { -#endif - -void* wav_decoder_open(const char *filename); -void wav_decoder_close(void* obj); -int wav_decoder_get_header(void* obj, int* format, int* channels, int* sample_rate, int* bits_per_sample, unsigned int* data_length); -int wav_decoder_run(void* obj, unsigned char* data, unsigned int length); -int wav_decoder_get_sample_rate(void* obj); -int wav_decoder_get_channel(void* obj); -int wav_decoder_get_data_length(void* obj); - -#ifdef __cplusplus -} -#endif - -#endif - diff --git a/port/drivers/esp_tts_wav/wav_encoder.c b/port/drivers/esp_tts_wav/wav_encoder.c deleted file mode 100644 index aab8f550..00000000 --- a/port/drivers/esp_tts_wav/wav_encoder.c +++ /dev/null @@ -1,111 +0,0 @@ -/* ------------------------------------------------------------------ - * Copyright (C) 2009 Martin Storsjo - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. - * See the License for the specific language governing permissions - * and limitations under the License. - * ------------------------------------------------------------------- - */ - -#include "wav_encoder.h" -#include -#include -#include -#include - -struct wav_encoder { - FILE *wav; - int data_length; - - int sample_rate; - int bits_per_sample; - int channels; -}; - -static void write_string(struct wav_encoder* ww, const char *str) { - fputc(str[0], ww->wav); - fputc(str[1], ww->wav); - fputc(str[2], ww->wav); - fputc(str[3], ww->wav); -} - -static void write_int32(struct wav_encoder* ww, int value) { - fputc((value >> 0) & 0xff, ww->wav); - fputc((value >> 8) & 0xff, ww->wav); - fputc((value >> 16) & 0xff, ww->wav); - fputc((value >> 24) & 0xff, ww->wav); -} - -static void write_int16(struct wav_encoder* ww, int value) { - fputc((value >> 0) & 0xff, ww->wav); - fputc((value >> 8) & 0xff, ww->wav); -} - -static void write_header(struct wav_encoder* ww, int length) { - int bytes_per_frame, bytes_per_sec; - write_string(ww, "RIFF"); - write_int32(ww, 4 + 8 + 16 + 8 + length); - write_string(ww, "WAVE"); - - write_string(ww, "fmt "); - write_int32(ww, 16); - - bytes_per_frame = ww->bits_per_sample/8*ww->channels; - bytes_per_sec = bytes_per_frame*ww->sample_rate; - write_int16(ww, 1); // Format - write_int16(ww, ww->channels); // Channels - write_int32(ww, ww->sample_rate); // Samplerate - write_int32(ww, bytes_per_sec); // Bytes per sec - write_int16(ww, bytes_per_frame); // Bytes per frame - write_int16(ww, ww->bits_per_sample); // Bits per sample - - write_string(ww, "data"); - write_int32(ww, length); -} - -void* wav_encoder_open(const char *filename, int sample_rate, int bits_per_sample, int channels) { - struct wav_encoder* ww = (struct wav_encoder*) malloc(sizeof(*ww)); - memset(ww, 0, sizeof(*ww)); - ww->wav = fopen(filename, "wb"); - if (ww->wav == NULL) { - free(ww); - return NULL; - } - ww->data_length = 0; - ww->sample_rate = sample_rate; - ww->bits_per_sample = bits_per_sample; - ww->channels = channels; - - write_header(ww, ww->data_length); - return ww; -} - -void wav_encoder_close(void* obj) { - struct wav_encoder* ww = (struct wav_encoder*) obj; - if (ww->wav == NULL) { - free(ww); - return; - } - fseek(ww->wav, 0, SEEK_SET); - write_header(ww, ww->data_length); - fclose(ww->wav); - free(ww); -} - -void wav_encoder_run(void* obj, const unsigned char* data, int length) { - struct wav_encoder* ww = (struct wav_encoder*) obj; - if (ww->wav == NULL) - return; - fwrite(data, length, 1, ww->wav); - ww->data_length += length; -} - diff --git a/port/drivers/esp_tts_wav/wav_encoder.h b/port/drivers/esp_tts_wav/wav_encoder.h deleted file mode 100644 index 4fafd271..00000000 --- a/port/drivers/esp_tts_wav/wav_encoder.h +++ /dev/null @@ -1,35 +0,0 @@ -/* ------------------------------------------------------------------ - * Copyright (C) 2009 Martin Storsjo - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. - * See the License for the specific language governing permissions - * and limitations under the License. - * ------------------------------------------------------------------- - */ - -#ifndef WAV_ENCODER_H -#define WAV_ENCODER_H - -#ifdef __cplusplus -extern "C" { -#endif - -void* wav_encoder_open(const char *filename, int sample_rate, int bits_per_sample, int channels); -void wav_encoder_close(void* obj); -void wav_encoder_run(void* obj, const unsigned char* data, int length); - -#ifdef __cplusplus -} -#endif - -#endif - diff --git a/port/drivers/tts/tts_pcm.c b/port/drivers/tts/tts_pcm.c index 671530f7..0eec16f8 100644 --- a/port/drivers/tts/tts_pcm.c +++ b/port/drivers/tts/tts_pcm.c @@ -52,17 +52,19 @@ void tts_task(void* param) { if (esp_tts_parse_chinese(tts_handle, task_param->text)) { int len[1]={0}; + bsp_codec_dev_delete(); bsp_codec_dev_create(); - //bsp_audio_set_play_vol(50); - bsp_codec_dev_open(16000,1,256000); + bsp_audio_set_play_vol(50); + bsp_codec_dev_open(16000,2,32); do { - short* pcm = esp_tts_stream_play(tts_handle, len, 1); - bsp_audio_play(pcm, len[0] * 2, portMAX_DELAY); - printf("data:%d \n", len[0]); - vTaskDelay(pdMS_TO_TICKS(10)); + short* pcm = esp_tts_stream_play(tts_handle, len, 2); + bsp_audio_play2(pcm, len[0]*2, portMAX_DELAY); + // vTaskDelay(pdMS_TO_TICKS(10)); } while (len[0] > 0); + bsp_codec_dev_close(); + bsp_codec_dev_delete(); } - + esp_tts_stream_reset(tts_handle); xSemaphoreGive(task_param->done_semaphore); vTaskDelete(NULL);