Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions apps/cli/src/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import type {
SessionMeta,
StoredMessage,
TaskManager,
VoiceStatus,
} from '@deepcode/core';
import {
contextWindowFor,
Expand Down Expand Up @@ -128,6 +129,17 @@ export interface SessionContext {
credsStore?: CredentialsStore;
/** User settings.json path (REPL-injected, honors --home) — backs /config set. */
userSettingsPath?: string;
/** Home dir override (REPL-injected from --home) — backs default-path lookups
* like /voice's `~/.deepcode/models/...` model probe. Defaults to os.homedir(). */
home?: string;
/**
* Interactive voice capture, wired by the REPL (it owns readline + the mic):
* record → press Enter to stop → transcribe → return the text + display lines.
* `transcript` is null on cancel / not-ready / error. Absent in headless mode.
*/
voiceCapture?: () => Promise<{ transcript: string | null; lines: string[] }>;
/** Set by /voice — the REPL pre-fills the next input line with this text. */
prefillInput?: string;
sessionId: string;
sessions: SessionManager;
usage: {
Expand Down Expand Up @@ -1169,6 +1181,74 @@ export const TasksCommand: SlashCommand = {
},
};

/** "Ready" status lines for /voice (non-interactive / headless fallback). */
export function voiceReadyLines(status: VoiceStatus): string[] {
return [
'🎙 Voice input is ready — whisper.cpp, fully local (no audio leaves your machine).',
` binary: ${status.binPath}`,
` model: ${status.modelPath}`,
'',
'Type /voice in the interactive REPL to dictate (record → Enter to stop → transcribe).',
];
}

/** Setup/troubleshooting instructions for /voice, driven by a detection result. */
export function voiceSetupLines(status: VoiceStatus): string[] {
const lines: string[] = [
status.ready
? '🎙 Voice input is ready. Setup reference below.'
: '🎙 Voice input is not set up yet. Enable local dictation (whisper.cpp — no cloud):',
'',
'Detected:',
` ${status.binPath ? '✓' : '✗'} whisper binary ${status.binPath ?? '(not found)'}`,
` ${status.modelPath ? '✓' : '✗'} model ${status.modelPath ?? '(not found)'}`,
];
if (status.problems.length) {
lines.push('', 'Issues:');
for (const p of status.problems) lines.push(` • ${p}`);
}
lines.push(
'',
'Setup:',
' 1. Install whisper.cpp',
' macOS: brew install whisper-cpp',
' Linux: build https://github.com/ggerganov/whisper.cpp, put `whisper` on PATH',
' 2. Download a model (base.en ≈ 140 MB is a good default) and save it:',
' mkdir -p ~/.deepcode/models',
' cp ggml-base.en.bin ~/.deepcode/models/whisper-base.en.bin',
' 3. Install a mic recorder (either): brew install ffmpeg · brew install sox',
' 4. (optional) Point DeepCode at custom paths in ~/.deepcode/settings.json:',
' { "voice": { "binPath": "/opt/homebrew/bin/whisper-cli",',
' "modelPath": "~/.deepcode/models/whisper-base.en.bin" } }',
'',
'Full guide: docs/VOICE_INPUT.md',
);
return lines;
}

export const VoiceCommand: SlashCommand = {
name: '/voice',
description:
'Dictate via local whisper.cpp (record → Enter → transcribe); `/voice setup` for steps.',
async run(args, ctx) {
const forceSetup = (args[0] ?? '').toLowerCase() === 'setup';

// Interactive REPL: record + transcribe via the wired callback, then let the
// REPL pre-fill the input line with the transcript for the user to edit.
if (!forceSetup && ctx.voiceCapture) {
const r = await ctx.voiceCapture();
if (r.transcript) ctx.prefillInput = r.transcript;
return r.lines;
}

// Headless / `/voice setup`: report readiness or print setup instructions.
const { detectVoice } = await import('@deepcode/core');
const status = await detectVoice(ctx.settings.voice, { home: ctx.home });
if (status.ready && !forceSetup) return voiceReadyLines(status);
return voiceSetupLines(status);
},
};

export const BackgroundCommand: SlashCommand = {
name: '/background',
aliases: ['/bg'],
Expand Down Expand Up @@ -1229,6 +1309,7 @@ export const BUILTIN_COMMANDS: SlashCommand[] = [
BtwCommand,
TasksCommand,
BackgroundCommand,
VoiceCommand,
];

// ──────────────────────────────────────────────────────────────────────────
Expand Down
20 changes: 19 additions & 1 deletion apps/cli/src/repl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ import {
import { createInterface } from 'node:readline/promises';
import type { Readable, Writable } from 'node:stream';
import { CommandRegistry, type SessionContext } from './commands.js';
import { captureVoice } from './voice-capture.js';
import { resolveEffort } from './parse-args.js';
import { TrustStore } from './trust.js';
import { resolveBuiltinSkillsDir } from './builtin-skills.js';
Expand Down Expand Up @@ -437,6 +438,7 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
creds,
credsStore,
userSettingsPath: settingsPaths({ cwd, home: opts.home }).userPath,
home: opts.home,
sessionId: session.id,
sessions,
usage: { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0 },
Expand All @@ -452,6 +454,9 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
...(pluginsWire?.spawnFailures.map((n) => `${n}: failed to start`) ?? []),
],
initFlow: () => runInitFlow({ cwd, output, rl, provider, model, maxTokens, temperature }),
// M8: /voice records from the mic + transcribes via whisper.cpp, then the
// loop pre-fills the next input line with the transcript (rl is created below).
voiceCapture: () => captureVoice({ rl, output, settings, home: opts.home }),
// M7: /rewind needs access to history + provider.
provider,
history,
Expand Down Expand Up @@ -548,10 +553,19 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
};
await fireLifecycle('SessionStart', { sessionId: session.id, source: 'cli' });

// Text to inject into the next prompt's input buffer (e.g. a /voice transcript
// the user can edit before submitting). Written right after the prompt renders.
let pendingPrefill: string | undefined;

while (true) {
let userInput: string;
try {
userInput = await rl.question('› ');
const question = rl.question('› ');
if (pendingPrefill !== undefined) {
rl.write(pendingPrefill);
pendingPrefill = undefined;
}
userInput = await question;
} catch {
break;
}
Expand Down Expand Up @@ -589,6 +603,10 @@ export async function startRepl(opts: ReplOpts): Promise<number> {
history = ctx.newHistory;
ctx.newHistory = undefined;
}
if (ctx.prefillInput) {
pendingPrefill = ctx.prefillInput;
ctx.prefillInput = undefined;
}
if (ctx.exitRequested) break;
continue;
}
Expand Down
108 changes: 108 additions & 0 deletions apps/cli/src/voice-capture.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Interactive voice capture for the REPL: detect whisper.cpp + a recorder,
// record from the mic until the user presses Enter, transcribe locally, and
// return the text so the REPL can pre-fill the input line. The audio file is
// written to $TMPDIR and deleted right after transcription (see VOICE_INPUT.md).

import { randomUUID } from 'node:crypto';
import { rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { Interface as ReadlineInterface } from 'node:readline/promises';
import type { Writable } from 'node:stream';
import {
detectRecorder,
detectVoice,
recordToWav,
WhisperCppProvider,
type DeepCodeSettings,
} from '@deepcode/core';
import { voiceSetupLines } from './commands.js';

export interface VoiceCaptureDeps {
rl: ReadlineInterface;
output: Writable;
settings: DeepCodeSettings;
/** Home override (honors --home), for the default model-path probe. */
home?: string;
}

export interface VoiceCaptureResult {
/** Transcribed text, or null on cancel / not-ready / empty / error. */
transcript: string | null;
/** Lines for the REPL to print (status, errors, or setup steps). */
lines: string[];
}

export async function captureVoice(deps: VoiceCaptureDeps): Promise<VoiceCaptureResult> {
const { rl, output, settings, home } = deps;

const status = await detectVoice(settings.voice, { home });
if (!status.ready) return { transcript: null, lines: voiceSetupLines(status) };

const rec = await detectRecorder();
if (!rec.found || !rec.bin || !rec.binPath) {
return {
transcript: null,
lines: [
'🎙 whisper.cpp is ready, but no microphone recorder was found.',
` • ${rec.problems[0] ?? 'no recorder on PATH'}`,
' Install one: brew install ffmpeg · brew install sox',
],
};
}

const wav = join(tmpdir(), `deepcode-voice-${randomUUID()}.wav`);
const cleanup = async (): Promise<void> => {
await rm(wav, { force: true });
await rm(`${wav}.txt`, { force: true }); // whisper --output-txt side-file
};

// Record until the user presses Enter (abort → SIGINT → recorder flushes WAV).
const ac = new AbortController();
let recErr: Error | undefined;
const recording = recordToWav({
outPath: wav,
bin: rec.bin,
binPath: rec.binPath,
signal: ac.signal,
device: settings.voice?.inputDevice,
}).catch((e: unknown) => {
recErr = e as Error;
});

output.write(` 🎙 Recording with ${rec.bin}… press Enter to stop.\n`);
await rl.question('');
ac.abort();
await recording;

if (recErr) {
await cleanup();
return {
transcript: null,
lines: [` ⚠ Recording failed: ${recErr.message}`, ' Run `/voice setup` for help.'],
};
}

try {
output.write(' … transcribing locally\n');
const provider = new WhisperCppProvider({
binPath: status.binPath,
modelPath: status.modelPath!,
});
const { text } = await provider.transcribe(wav);
await cleanup();
const transcript = text.trim();
if (!transcript) {
return { transcript: null, lines: [' (No speech detected — nothing inserted.)'] };
}
return {
transcript,
lines: [
` 🎙 Transcribed (${transcript.length} chars) — review the input line, edit, then press Enter.`,
],
};
} catch (e) {
await cleanup();
return { transcript: null, lines: [` ⚠ Transcription failed: ${(e as Error).message}`] };
}
}
118 changes: 118 additions & 0 deletions apps/cli/src/voice-cmd.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Tests for the /voice slash command messaging. Detection logic itself is
// unit-tested in core (voice/detect.test.ts); here we drive the command end to
// end with real temp files so the "ready" path is deterministic, and bogus
// configured paths so the "not set up" path never depends on the host's PATH.

import { afterEach, describe, expect, it } from 'vitest';
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { SessionManager } from '@deepcode/core';
import { CommandRegistry, type SessionContext } from './commands.js';

const reg = new CommandRegistry();
const tmps: string[] = [];
async function tmpDir(): Promise<string> {
const d = await mkdtemp(join(tmpdir(), 'dc-voice-'));
tmps.push(d);
return d;
}
afterEach(async () => {
await Promise.all(tmps.splice(0).map((d) => rm(d, { recursive: true, force: true })));
});

function ctx(overrides: Partial<SessionContext> = {}): SessionContext {
return {
cwd: '/tmp/x',
model: 'deepseek-chat',
mode: 'default',
effort: 'medium',
settings: {},
creds: { apiKey: 'sk-test' },
sessionId: 's1',
sessions: new SessionManager({ root: '/tmp/x' }),
usage: { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cacheReadTokens: 0 },
...overrides,
};
}

const run = (args: string[], c: SessionContext) => reg.match('/voice')!.cmd.run(args, c);

describe('/voice', () => {
it('reports ready when configured binary + model both exist', async () => {
const dir = await tmpDir();
const binPath = join(dir, 'whisper-cli');
const modelPath = join(dir, 'model.bin');
await writeFile(binPath, '#!/bin/sh\n');
await writeFile(modelPath, 'GGML');
// No voiceCapture wired (headless / non-interactive) → report readiness.
const out = (await run([], ctx({ settings: { voice: { binPath, modelPath } } }))).join('\n');
expect(out).toMatch(/ready/i);
expect(out).toContain(binPath);
expect(out).toContain(modelPath);
expect(out).toMatch(/type \/voice/i);
});

it('prints setup steps + issues when configured paths are missing', async () => {
const out = (
await run(
[],
ctx({ settings: { voice: { binPath: '/no/such/whisper', modelPath: '/no/such/m.bin' } } }),
)
).join('\n');
expect(out).toMatch(/not set up yet/i);
expect(out).toMatch(/brew install whisper-cpp/);
expect(out).toMatch(/docs\/VOICE_INPUT\.md/);
// The specific configured-but-missing problems surface under "Issues:".
expect(out).toMatch(/Issues:/);
expect(out).toContain('Configured voice.binPath not found: /no/such/whisper');
expect(out).toContain('Configured voice.modelPath not found: /no/such/m.bin');
});

it('`/voice setup` always shows install steps, even when ready', async () => {
const dir = await tmpDir();
const binPath = join(dir, 'whisper-cli');
const modelPath = join(dir, 'model.bin');
await writeFile(binPath, '');
await writeFile(modelPath, '');
const out = (await run(['setup'], ctx({ settings: { voice: { binPath, modelPath } } }))).join(
'\n',
);
expect(out).toMatch(/Setup:/);
expect(out).toMatch(/brew install whisper-cpp/);
// Still acknowledges it's already ready.
expect(out).toMatch(/ready/i);
});

it('runs the wired capture callback and pre-fills the transcript', async () => {
const c = ctx({
voiceCapture: async () => ({ transcript: 'refactor the parser', lines: ['🎙 Transcribed'] }),
});
const out = (await run([], c)).join('\n');
expect(out).toContain('Transcribed');
expect(c.prefillInput).toBe('refactor the parser'); // REPL will inject this
});

it('does not pre-fill when capture is cancelled / empty', async () => {
const c = ctx({
voiceCapture: async () => ({ transcript: null, lines: ['(No speech detected)'] }),
});
const out = (await run([], c)).join('\n');
expect(out).toMatch(/no speech/i);
expect(c.prefillInput).toBeUndefined();
});

it('`/voice setup` bypasses capture even when a callback is wired', async () => {
let called = false;
const c = ctx({
settings: { voice: { binPath: '/no/such', modelPath: '/no/such' } },
voiceCapture: async () => {
called = true;
return { transcript: 'x', lines: [] };
},
});
const out = (await run(['setup'], c)).join('\n');
expect(called).toBe(false);
expect(out).toMatch(/Setup:/);
});
});
Loading
Loading