From 7171f7921970a87bdb5d8704866c57a6acd56418 Mon Sep 17 00:00:00 2001 From: Harshdeep Singh Date: Fri, 12 Jun 2026 15:50:42 +0530 Subject: [PATCH] test(e2e): add Gemini TTS coverage to provider matrix Wire Gemini TTS into the existing E2E test infrastructure. - Add Gemini TTS adapter factory in media-providers - Add Gemini generateContent audio mock - Add Gemini to TTS feature support - Exercise PCM-to-WAV normalization using raw PCM audio responses This extends existing TTS E2E coverage without changing runtime behavior or public APIs. --- testing/e2e/global-setup.ts | 67 ++++++++++++++++++++++++++ testing/e2e/src/lib/feature-support.ts | 2 +- testing/e2e/src/lib/media-providers.ts | 10 +++- 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/testing/e2e/global-setup.ts b/testing/e2e/global-setup.ts index f869df01a..5f1bd7ca9 100644 --- a/testing/e2e/global-setup.ts +++ b/testing/e2e/global-setup.ts @@ -43,6 +43,19 @@ export default async function globalSetup() { mock.mount('/v1/text-to-speech', elevenLabsTTSMount()) mock.mount('/v1/speech-to-text', elevenLabsSTTMount()) + // Gemini TTS hits the standard Gemini generateContent endpoint + // (POST /v1beta/models/{model}:generateContent) with + // responseModalities: ['AUDIO']. aimock's native Gemini audio helper derives + // the mime type from the fixture's `format`/`contentType`, so it can't emit + // the raw `audio/L16;codec=pcm;rate=24000` PCM that real Gemini TTS returns. + // Mount the TTS model's generateContent path directly so we can hand back + // PCM and exercise the adapter's PCM→WAV normalization. The path is specific + // to the TTS model, so it doesn't intercept Gemini chat/summarize requests. + mock.mount( + '/v1beta/models/gemini-3.1-flash-tts-preview:generateContent', + geminiTTSMount(), + ) + // Anthropic server_tool_use bug reproduction (issue #604). aimock can't // natively synthesize `server_tool_use` / `web_fetch_tool_result` content // blocks, so this mount hand-crafts the raw SSE Claude would emit when a @@ -107,6 +120,14 @@ const FAKE_MP3_BYTES = Buffer.from([ 0xff, 0xfb, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]) +/** + * Raw 16-bit little-endian PCM bytes. Gemini TTS returns audio as + * `audio/L16;codec=pcm;rate=24000` inlineData, which the adapter wraps in a + * RIFF/WAV header before handing it to the browser. The samples are arbitrary + * silence — the spec only asserts the `