diff --git a/port/boards/labplus_for_xuejing/bsp_audio_board.c b/port/boards/labplus_for_xuejing/bsp_audio_board.c index 67dc2371..71d92039 100644 --- a/port/boards/labplus_for_xuejing/bsp_audio_board.c +++ b/port/boards/labplus_for_xuejing/bsp_audio_board.c @@ -53,6 +53,11 @@ static const audio_codec_ctrl_if_t *codec_ctrl_if = NULL; static const audio_codec_if_t *codec_if = NULL; static esp_codec_dev_handle_t codec_dev = NULL; +static int s_play_sample_rate = 16000; +static int s_play_channel_format = 1; +static int s_bits_per_chan = 16; + + static esp_err_t bsp_i2s_init(uint32_t sample_rate, int channel_format, int bits_per_sample) { esp_err_t ret_val = ESP_OK; @@ -274,6 +279,63 @@ esp_err_t bsp_audio_play(const int8_t *data, int length, TickType_t ticks_to_wa return ret; } +esp_err_t bsp_audio_play2(const int16_t* data, int length, TickType_t ticks_to_wait) +{ + size_t bytes_write = 0; + esp_err_t ret = ESP_OK; + if (!codec_dev) { + return ESP_FAIL; + } + + int out_length= length; + int audio_time = 1; + audio_time *= (16000 / s_play_sample_rate); + audio_time *= (2 / s_play_channel_format); + + int *data_out = NULL; + if (s_bits_per_chan != 32) { + out_length = length * 2; + data_out = malloc(out_length); + for (int i = 0; i < length / sizeof(int16_t); i++) { + int ret = data[i]; + data_out[i] = ret << 16; + } + } + + int *data_out_1 = NULL; + if (s_play_channel_format != 2 || s_play_sample_rate != 16000) { + out_length *= audio_time; + data_out_1 = malloc(out_length); + int *tmp_data = NULL; + if (data_out != NULL) { + tmp_data = data_out; + } else { + tmp_data = data; + } + + for (int i = 0; i < out_length / (audio_time * sizeof(int)); i++) { + for (int j = 0; j < audio_time; j++) { + data_out_1[audio_time * i + j] = tmp_data[i]; + } + } + if (data_out != NULL) { + free(data_out); + data_out = NULL; + } + } + + if (data_out != NULL) { + ret = esp_codec_dev_write(codec_dev, (void *)data_out, out_length); + free(data_out); + } else if (data_out_1 != NULL) { + ret = esp_codec_dev_write(codec_dev, (void *)data_out_1, out_length); + free(data_out_1); + } else { + ret = esp_codec_dev_write(codec_dev, (void *)data, length); + } + + return ret; +} /* 做为afe is_get_raw_channel = true :16K 16bit 左声道 + 右声道,无ref, is_get_raw_channel = false :16K 16bit 左声道 + 右声道 + 一路ref,*/ diff --git a/port/boards/labplus_for_xuejing/mpconfigboard.cmake b/port/boards/labplus_for_xuejing/mpconfigboard.cmake index a05e46c9..3f50d884 100644 --- a/port/boards/labplus_for_xuejing/mpconfigboard.cmake +++ b/port/boards/labplus_for_xuejing/mpconfigboard.cmake @@ -42,6 +42,7 @@ set(MICROPY_SOURCE_BOARD ${MPY_PORT_DIR}/drivers/startup/oled.c ${MPY_PORT_DIR}/drivers/startup/startup.c ${MPY_PORT_DIR}/drivers/rfid/mfrc522.c + ${MPY_PORT_DIR}/drivers/tts/tts_pcm.c ${MPY_PORT_DIR}/lib/utils/pyexec.c ${MPY_PORT_DIR}/builtins/modmusictunes.c ${MPY_PORT_DIR}/builtins/modmusic.c @@ -51,6 +52,7 @@ set(MICROPY_SOURCE_BOARD ${MPY_PORT_DIR}/builtins/modframebuf.c ${MPY_PORT_DIR}/builtins/modaudio.c ${MPY_PORT_DIR}/builtins/modrfid.c + ${MPY_PORT_DIR}/builtins/modtest.c # ${MPY_PORT_DIR}/builtins/mod_audio/audio_player.c # ${MPY_PORT_DIR}/builtins/mod_audio/audio_recorder.c # ${MPY_PORT_DIR}/builtins/mod_audio/vfs_stream.c @@ -60,6 +62,7 @@ set(MICROPY_SOURCE_BOARD set(MICROPY_SOURCE_BOARD_DIR ${MPY_PORT_DIR}/drivers ${MPY_PORT_DIR}/drivers/audio/include + ${MPY_PORT_DIR}/drivers/tts ${MPY_PORT_DIR}/lib ${MPY_PORT_DIR}/builtins ${MPY_PORT_DIR}/boards diff --git a/port/boards/labplus_for_xuejing/partitions-8MiB.csv b/port/boards/labplus_for_xuejing/partitions-8MiB.csv index 3c955e12..4a8942d4 100644 --- a/port/boards/labplus_for_xuejing/partitions-8MiB.csv +++ b/port/boards/labplus_for_xuejing/partitions-8MiB.csv @@ -1,10 +1,9 @@ # Notes: the offset of the partition table itself is set in # $IDF_PATH/components/partition_table/Kconfig.projbuild. -# Name, Type, SubType, Offset, Size, Flags -nvs, data, nvs, 0x9000, 0x6000, -phy_init, data, phy, 0xf000, 0x1000, -factory, app, factory, 0x10000, 0x200000, -vfs, data, spiffs, 0x210000, 0x1f0000, -font, data, nvs, 0x400000, 0x200000, -user_nvs, data, nvs, 0x7fc000, 0x4000, - +# Name, Type, SubType, Offset, Size, Flags +nvs, data, nvs, 0x9000, 0x6000, +phy_init, data, phy, 0xf000, 0x1000, +factory, app, factory, 0x10000, 0x2f0000, +vfs, data, fat, 0x300000, 0x100000, +font, data, fat, 0x400000, 0x3fc000, +user_nvs, data, nvs, 0x7fc000, 0x4000, diff --git a/port/builtins/modaudio.c b/port/builtins/modaudio.c index 8be6a369..61be6b44 100755 --- a/port/builtins/modaudio.c +++ b/port/builtins/modaudio.c @@ -135,6 +135,7 @@ static MP_DEFINE_CONST_FUN_OBJ_0(audio_loudness_obj, audio_loudness); static mp_obj_t audio_record(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { + audio_init(); enum { ARG_file_name, ARG_record_time, ARG_bits_per_sample, ARG_channels, ARG_sampleRate}; static const mp_arg_t allowed_args[] = { { MP_QSTR_file_name, MP_ARG_REQUIRED | MP_ARG_OBJ }, @@ -193,4 +194,4 @@ const mp_obj_module_t mp_module_audio = { }; MP_REGISTER_MODULE(MP_QSTR_audio, mp_module_audio); -// #endif //#if MICROPY_PY_AUDIO \ No newline at end of file +// #endif //#if MICROPY_PY_AUDIO diff --git a/port/builtins/modtest.c b/port/builtins/modtest.c new file mode 100644 index 00000000..aaf41bb8 --- /dev/null +++ b/port/builtins/modtest.c @@ -0,0 +1,39 @@ +#include "py/obj.h" +#include "py/runtime.h" +#include "esp_tts.h" +#include "tts_pcm.h" + +#ifndef STATIC +#define STATIC static +#endif + +// MicroPython 绑定函数 +STATIC mp_obj_t mp_tts_generate(mp_obj_t text_obj) { + // 获取文本参数 + const char* text = mp_obj_str_get_str(text_obj); + + // 直接调用 text_to_speech 播放音频 + text_to_speech(text); + + return mp_const_none; // 播放完成后返回 None +} + +STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp_tts_generate_obj, mp_tts_generate); + + +// 模块定义 +STATIC const mp_rom_map_elem_t tts_module_globals_table[] = { + { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_tts) }, + { MP_ROM_QSTR(MP_QSTR_generate), MP_ROM_PTR(&mp_tts_generate_obj) }, +}; +STATIC MP_DEFINE_CONST_DICT(tts_module_globals, tts_module_globals_table); + +// 模块结构体 +const mp_obj_module_t tts_module = { + .base = { &mp_type_module }, + .globals = (mp_obj_dict_t*)&tts_module_globals, +}; + +// 注册模块 +MP_REGISTER_MODULE(MP_QSTR_tts, tts_module); + diff --git a/port/builtins/modtest.c.bk b/port/builtins/modtest.c.bk new file mode 100644 index 00000000..7d0c7b84 --- /dev/null +++ b/port/builtins/modtest.c.bk @@ -0,0 +1,44 @@ +#include "py/obj.h" +#include "py/runtime.h" +#include "esp_tts.h" +#include "tts_pcm.h" + +#ifndef STATIC +#define STATIC static +#endif + +// MicroPython绑定函数 +STATIC mp_obj_t mp_tts_generate(mp_obj_t text_obj) { + // 获取参数 + const char* text = mp_obj_str_get_str(text_obj); + + // 生成 PCM 数据 + int pcm_len = 0; + short* pcm_data = text_to_pcm(text, &pcm_len); + + if (!pcm_data || pcm_len == 0) { + return mp_const_none; // 失败返回 None + } + + // 返回 PCM 数据为字节数组 + return mp_obj_new_bytearray_by_ref(pcm_len * sizeof(short), pcm_data); +} + +STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp_tts_generate_obj, mp_tts_generate); + +// 模块定义 +STATIC const mp_rom_map_elem_t tts_module_globals_table[] = { + { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_tts) }, + { MP_ROM_QSTR(MP_QSTR_generate), MP_ROM_PTR(&mp_tts_generate_obj) }, +}; +STATIC MP_DEFINE_CONST_DICT(tts_module_globals, tts_module_globals_table); + +// 模块结构体 +const mp_obj_module_t tts_module = { + .base = { &mp_type_module }, + .globals = (mp_obj_dict_t*)&tts_module_globals, +}; + +// 注册模块 +MP_REGISTER_MODULE(MP_QSTR_tts, tts_module); + diff --git a/port/drivers/audio/include/bsp_audio.h b/port/drivers/audio/include/bsp_audio.h index 916c72af..5483936a 100644 --- a/port/drivers/audio/include/bsp_audio.h +++ b/port/drivers/audio/include/bsp_audio.h @@ -67,6 +67,7 @@ esp_err_t bsp_codec_dev_open(int sample_rate, int channel_format, int bits_per_s esp_err_t bsp_codec_dev_close(void); esp_err_t bsp_audio_play(const int8_t *data, int length, TickType_t ticks_to_wait); +esp_err_t bsp_audio_play2(const int16_t *data, int length, TickType_t ticks_to_wait); /** * @brief Get the record pcm data. diff --git a/port/drivers/audio/include/recorder.h b/port/drivers/audio/include/recorder.h index 4eb59c52..024398dc 100644 --- a/port/drivers/audio/include/recorder.h +++ b/port/drivers/audio/include/recorder.h @@ -1,6 +1,7 @@ #ifndef __RECORDER_ #define __RECORDER_ #include "freertos/FreeRTOS.h" +#include "freertos/semphr.h" #include "freertos/event_groups.h" #include "ringbuf.h" #include "py/obj.h" @@ -27,5 +28,6 @@ typedef struct void recorder_record(const char *filename, wav_fmt_t fmt, int time); extern recorder_handle_t *recorder; +extern SemaphoreHandle_t xBinarySemaphore; #endif diff --git a/port/drivers/audio/include/wav_codec.h b/port/drivers/audio/include/wav_codec.h index 72ef9281..fc77cdef 100644 --- a/port/drivers/audio/include/wav_codec.h +++ b/port/drivers/audio/include/wav_codec.h @@ -23,6 +23,9 @@ #include "py/obj.h" #include "freertos/ringbuf.h" #include "wave_head.h" +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" + #ifdef __cplusplus extern "C" { diff --git a/port/drivers/audio/recorder.c b/port/drivers/audio/recorder.c index 1704bbe7..60002434 100755 --- a/port/drivers/audio/recorder.c +++ b/port/drivers/audio/recorder.c @@ -11,6 +11,8 @@ #include #include #include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/semphr.h" #include "esp_system.h" #include "esp_log.h" #include "py/stream.h" @@ -23,6 +25,7 @@ #include "player.h" #include "msg.h" + // #define TAG "recorder" recorder_handle_t *recorder = NULL; @@ -65,7 +68,10 @@ void recorder_record(const char *filename, wav_fmt_t fmt, int time) recorder->total_frames = recorder->time * (recorder->wav_fmt.sampleRate * recorder->wav_fmt.channels * recorder->wav_fmt.bits_per_sample / 8) / READ_RINGBUF_BLOCK_SIZE; // ESP_LOGE(TAG, "record total frame: %d, time: %d", recorder->total_frames, recorder->time); recorder->file_uri = filename; + xBinarySemaphore = xSemaphoreCreateBinary(); xTaskCreatePinnedToCore(&stream_i2s_read_task, "stream_i2s_read_task", 4 * 1024, (void*)recorder, 8, NULL, CORE_NUM1); + xSemaphoreTake(xBinarySemaphore, portMAX_DELAY); + vSemaphoreDelete(xBinarySemaphore); } void record_deinit(void) diff --git a/port/drivers/audio/wav_codec.c b/port/drivers/audio/wav_codec.c index bf883f16..6a919be6 100644 --- a/port/drivers/audio/wav_codec.c +++ b/port/drivers/audio/wav_codec.c @@ -19,6 +19,7 @@ #include "esp_log.h" #include "freertos/FreeRTOS.h" #include "freertos/task.h" +#include "freertos/semphr.h" #include #include #include @@ -29,6 +30,8 @@ #include "player.h" #include "recorder.h" +SemaphoreHandle_t xBinarySemaphore; // 定义信号量 + static const char *TAG = "wav_codec"; wav_codec_t *wav_file_open(const char *path_in, int mode) @@ -99,7 +102,7 @@ void wav_file_read_task(void *arg) if(!wav_codec){ break; } player->wav_codec = wav_codec; - if(!wav_codec_ringbuff){ + if(!wav_codec_ringbuff){/ wav_codec_ringbuff = xRingbufferCreate(STREAM_OUT_RINGBUF_SIZE, RINGBUF_TYPE_BYTEBUF); if(!wav_codec_ringbuff){ break; } }else{ @@ -291,5 +294,6 @@ void stream_i2s_read_task(void *arg) free(stream_buff); } // ESP_LOGE(TAG, "stream in task end, RAM left: %ld", esp_get_free_heap_size()); + xSemaphoreGive(xBinarySemaphore); vTaskDelete(NULL); } diff --git a/port/drivers/tts/tts_pcm.c b/port/drivers/tts/tts_pcm.c new file mode 100644 index 00000000..0eec16f8 --- /dev/null +++ b/port/drivers/tts/tts_pcm.c @@ -0,0 +1,92 @@ +#include +#include +#include + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "esp_system.h" +#include "freertos/semphr.h" +#include "esp_tts.h" +#include "esp_tts_voice_xiaole.h" +#include "esp_tts_voice_template.h" +#include "esp_tts_player.h" +#include "ringbuf.h" + +#include "esp_partition.h" +#include "esp_idf_version.h" +#include "bsp_audio.h" + +typedef struct { + char text[256]; + SemaphoreHandle_t done_semaphore; +} tts_task_param_t; + +void tts_task(void* param) { + tts_task_param_t* task_param = (tts_task_param_t*)param; + const esp_partition_t* part = esp_partition_find_first(ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, "font"); + if (part == NULL) { + printf("Couldn't find voice data partition!\n"); + vTaskDelete(NULL); + return; + } + + const void* voicedata; + esp_partition_mmap_handle_t mmap; + esp_err_t err = esp_partition_mmap(part, 0, part->size, ESP_PARTITION_MMAP_DATA, (const void**)&voicedata, &mmap); + if (err != ESP_OK) { + printf("Couldn't map voice data partition!\n"); + vTaskDelete(NULL); + return; + } + + esp_tts_voice_t* voice = esp_tts_voice_set_init(&esp_tts_voice_template, (int16_t*)voicedata); + if (voice == NULL) { + printf("TTS voice init failed!\n"); + return; + } + esp_tts_handle_t* tts_handle = esp_tts_create(voice); + if (!tts_handle || !task_param->text) { + printf("Invalid TTS handle or text!\n"); + return; + } + + if (esp_tts_parse_chinese(tts_handle, task_param->text)) { + int len[1]={0}; + bsp_codec_dev_delete(); + bsp_codec_dev_create(); + bsp_audio_set_play_vol(50); + bsp_codec_dev_open(16000,2,32); + do { + short* pcm = esp_tts_stream_play(tts_handle, len, 2); + bsp_audio_play2(pcm, len[0]*2, portMAX_DELAY); + // vTaskDelay(pdMS_TO_TICKS(10)); + } while (len[0] > 0); + bsp_codec_dev_close(); + bsp_codec_dev_delete(); + } + + esp_tts_stream_reset(tts_handle); + xSemaphoreGive(task_param->done_semaphore); + vTaskDelete(NULL); +} + +void text_to_speech(const char* text) { + static SemaphoreHandle_t done_semaphore = NULL; + if (done_semaphore == NULL) { + done_semaphore = xSemaphoreCreateBinary(); + } + + tts_task_param_t* task_param = malloc(sizeof(tts_task_param_t)); + if (!task_param) { + printf("Failed to allocate memory for task parameters\n"); + return; + } + + strncpy(task_param->text, text, sizeof(task_param->text) - 1); + task_param->text[sizeof(task_param->text) - 1] = '\0'; + task_param->done_semaphore = done_semaphore; + + xTaskCreatePinnedToCore(tts_task, "tts_task", 8192, task_param, 5, NULL, 0); + xSemaphoreTake(done_semaphore, portMAX_DELAY); + free(task_param); +} diff --git a/port/drivers/tts/tts_pcm.c.bk b/port/drivers/tts/tts_pcm.c.bk new file mode 100644 index 00000000..c7e2538a --- /dev/null +++ b/port/drivers/tts/tts_pcm.c.bk @@ -0,0 +1,116 @@ +#include +#include +#include + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "esp_system.h" +#include "freertos/semphr.h" +#include "esp_tts.h" +#include "esp_tts_voice_xiaole.h" +#include "esp_tts_voice_template.h" +#include "esp_tts_player.h" +#include "ringbuf.h" + +#include "esp_partition.h" +#include "esp_idf_version.h" + +typedef struct { + char text[256]; // 存储待转换的文本 + short* pcm_data; // 存储 PCM 数据 + SemaphoreHandle_t done_semaphore; // 任务完成信号量 +} tts_task_param_t; + +static short* pcm_buffer = NULL; +static int pcm_data_len = 0; + +void tts_task(void* param) { + tts_task_param_t* task_param = (tts_task_param_t*)param; + + const esp_partition_t* part = esp_partition_find_first(ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, "font"); + if (part == NULL) { + printf("Couldn't find voice data partition!\n"); + vTaskDelete(NULL); + return; + } + + const void* voicedata; +#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0) + esp_partition_mmap_handle_t mmap; + esp_err_t err = esp_partition_mmap(part, 0, part->size, ESP_PARTITION_MMAP_DATA, (const void**)&voicedata, &mmap); +#else + spi_flash_mmap_handle_t mmap; + esp_err_t err = esp_partition_mmap(part, 0, part->size, SPI_FLASH_MMAP_DATA, &voicedata, &mmap); +#endif + if (err != ESP_OK) { + printf("Couldn't map voice data partition!\n"); + vTaskDelete(NULL); + return; + } + + esp_tts_voice_t* voice = esp_tts_voice_set_init(&esp_tts_voice_template, (int16_t*)voicedata); + if (voice == NULL) { + printf("TTS voice init failed!\n"); + return; + } + esp_tts_handle_t* tts_handle = esp_tts_create(voice); + if (!tts_handle || !task_param || !task_param->text) { + printf("Invalid TTS handle or text!\n"); + return; + } + + pcm_data_len = 0; + if (!pcm_buffer) { + pcm_buffer = malloc(16000 * sizeof(short)); + } + if (!pcm_buffer) { + printf("Failed to allocate PCM buffer\n"); + vTaskDelete(NULL); + return; + } + printf("Parsing text: %s\n", task_param->text); + printf("TTS handle: %p\n", tts_handle); + if (esp_tts_parse_chinese(tts_handle, task_param->text)) { + int len; + int total_len = 0; + printf("Start creat voice\n"); + do { + short* pcm = esp_tts_stream_play(tts_handle, &len, 3); + if (len > 0) { + //if (total_len + len > 16000) break; // 防止缓冲区溢出 + memcpy(pcm_buffer + total_len, pcm, len * sizeof(short)); + total_len += len; + } + vTaskDelay(pdMS_TO_TICKS(10)); // 让出 CPU,避免 WDT 复位 + } while (len > 0); + pcm_data_len = total_len; + } + + esp_tts_stream_reset(tts_handle); + + // 释放任务 + xSemaphoreGive(task_param->done_semaphore); + vTaskDelete(NULL); +} + +// 供外部调用的函数 +short* text_to_pcm(const char* text, int* out_len) { + static SemaphoreHandle_t done_semaphore = NULL; + if (done_semaphore == NULL) { + done_semaphore = xSemaphoreCreateBinary(); + } + + tts_task_param_t task_param; + strncpy(task_param.text, text, sizeof(task_param.text) - 1); + task_param.text[sizeof(task_param.text) - 1] = '\0'; + task_param.pcm_data = NULL; + task_param.done_semaphore = done_semaphore; + + // 启动任务 + xTaskCreatePinnedToCore(tts_task, "tts_task", 8192, &task_param, 5, NULL, 0); + + // 等待任务完成 + xSemaphoreTake(done_semaphore, portMAX_DELAY); + *out_len = pcm_data_len; + return pcm_buffer; +} diff --git a/port/drivers/tts/tts_pcm.h b/port/drivers/tts/tts_pcm.h new file mode 100644 index 00000000..2d0899b0 --- /dev/null +++ b/port/drivers/tts/tts_pcm.h @@ -0,0 +1,9 @@ +#pragma once +#ifdef __cplusplus +extern "C" { +#endif +short* text_to_speech(const char* text); +#ifdef __cplusplus +} +#endif + diff --git a/port/drivers/tts/tts_pcm.h.bk b/port/drivers/tts/tts_pcm.h.bk new file mode 100644 index 00000000..ffc29e7e --- /dev/null +++ b/port/drivers/tts/tts_pcm.h.bk @@ -0,0 +1,9 @@ +#pragma once +#ifdef __cplusplus +extern "C" { +#endif +short* text_to_pcm(const char* text, int* out_len); +#ifdef __cplusplus +} +#endif + diff --git a/port/makeimg.py b/port/makeimg.py index 3d8e0a84..5e2b9bd8 100755 --- a/port/makeimg.py +++ b/port/makeimg.py @@ -104,7 +104,8 @@ def load_partition_table(filename): arg_application_bin = "micropython.bin" arg_output_bin = bin_name arg_output_uf2= uf2_name -arg_font_bin = "../Noto_Sans_CJK_SC_Light16.bin" +# arg_font_bin = "../Noto_Sans_CJK_SC_Light16.bin" +arg_font_bin = "../components/esp-sr/esp-tts/esp_tts_chinese/esp_tts_voice_data_xiaoxin.dat" # Load required sdkconfig values. idf_target = load_sdkconfig_str_value(arg_sdkconfig, "IDF_TARGET", "").upper()