diff --git a/src/loader.ts b/src/loader.ts index b8d193e..3a912d6 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -42,6 +42,12 @@ export interface AgentManifest { top_k?: number; stop_sequences?: string[]; }; + routing?: { + enabled?: boolean; + lightweight?: string; + reasoning?: string; + rules?: Array<{ tier: "lightweight" | "reasoning"; match: string[] }>; + }; }; tools: string[]; skills?: string[]; diff --git a/src/model-routing.ts b/src/model-routing.ts new file mode 100644 index 0000000..de9f073 --- /dev/null +++ b/src/model-routing.ts @@ -0,0 +1,143 @@ +// Auto Model Routing (issue #48) +// +// Classifies each task in an agent workflow by complexity and routes it to the +// most appropriate model: lightweight tasks (summarize/extract/classify/ +// transform) go to a cheap model, while reasoning-intensive tasks (search, +// planning, decision-making, tool orchestration, complex problem solving) +// stay on the configured reasoning model. Explicit per-step / per-skill model +// settings always win, and anything unresolved falls back to the primary model. + +export type ModelTier = "lightweight" | "reasoning"; + +export interface RoutingConfig { + /** Master switch. Defaults to true when a routing block is present. */ + enabled?: boolean; + /** Concrete model id for lightweight tasks, e.g. "openai:gpt-4o-mini". */ + lightweight?: string; + /** Concrete model id for reasoning tasks, e.g. "openai:gpt-4o". */ + reasoning?: string; + /** User overrides for classification — first matching rule wins. */ + rules?: Array<{ tier: ModelTier; match: string[] }>; +} + +export interface RouteInput { + /** Explicit per-step model (highest priority). May be an alias or model id. */ + stepModel?: string; + /** Per-skill default model from SKILL.md frontmatter. May be an alias or id. */ + skillModel?: string; + /** Text used to classify the task (typically skill name + step prompt). */ + classifyText: string; + /** Routing configuration from agent.yaml (model.routing). */ + routing?: RoutingConfig; + /** The agent's primary/preferred model — the ultimate fallback. */ + primaryModel?: string; +} + +export interface RouteResult { + /** Resolved concrete "provider:model" string (undefined → let runtime decide). */ + model?: string; + /** The complexity tier, when the model came from automatic classification. */ + tier: ModelTier | null; + /** Where the decision came from. */ + source: "step" | "skill" | "auto" | "fallback"; +} + +// Default task-to-tier keyword framework, derived directly from the issue's +// recommended task-type table. Matched against word starts so "summarize", +// "summary" and "summarization" all hit "summ", without false positives like +// "already" matching "read". +const DEFAULT_LIGHTWEIGHT = [ + "summ", "extract", "classif", "transform", "format", "convert", + "parse", "fetch", "read", "load", "lookup", "normaliz", "translat", + "rephrase", "rewrite", "tag", "label", "render", +]; +const DEFAULT_REASONING = [ + "search", "analy", "plan", "decid", "decision", "orchestrat", "solve", + "reason", "validat", "evaluat", "review", "audit", "diagnos", "debug", + "architect", "design", "strateg", "investigat", "assess", "judge", + "verify", "critique", "infer", "deduc", +]; + +function matchesAny(text: string, keywords: string[]): boolean { + for (const kw of keywords) { + // Word-start boundary: keyword must begin a word. + const re = new RegExp(`\\b${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`, "i"); + if (re.test(text)) return true; + } + return false; +} + +/** + * Classify a task into a complexity tier. User-defined rules (from + * model.routing.rules) take precedence over the built-in defaults. When a task + * matches neither — or matches both — it defaults to "reasoning" so that + * reasoning quality is never sacrificed to save cost. + */ +export function classifyTaskTier( + classifyText: string, + rules?: Array<{ tier: ModelTier; match: string[] }>, +): ModelTier { + const text = classifyText || ""; + + // User overrides first, in declaration order. + if (rules) { + for (const rule of rules) { + if (Array.isArray(rule.match) && matchesAny(text, rule.match)) { + return rule.tier; + } + } + } + + const hasReasoning = matchesAny(text, DEFAULT_REASONING); + if (hasReasoning) return "reasoning"; + const hasLightweight = matchesAny(text, DEFAULT_LIGHTWEIGHT); + if (hasLightweight) return "lightweight"; + + // Unknown → keep quality high. + return "reasoning"; +} + +/** + * Resolve a model reference that may be a routing-tier alias + * ("lightweight"/"reasoning") or a literal "provider:model" id. + */ +export function resolveModelAlias(ref: string | undefined, routing?: RoutingConfig): string | undefined { + if (!ref) return undefined; + if (ref === "lightweight") return routing?.lightweight || undefined; + if (ref === "reasoning") return routing?.reasoning || undefined; + return ref; +} + +/** + * Decide which model a task should run on. Precedence: + * 1. explicit per-step model (source: "step") + * 2. per-skill declared model (source: "skill") + * 3. automatic classification (source: "auto") — when routing is enabled + * 4. primary/preferred model (source: "fallback") + * + * Automatic routing is active only when a routing block is present and not + * disabled. If classification picks a tier with no configured model, it falls + * through to the primary model (fallback on routing failure). + */ +export function resolveRoutedModel(input: RouteInput): RouteResult { + const { stepModel, skillModel, classifyText, routing, primaryModel } = input; + + // 1. Explicit per-step override. + const fromStep = resolveModelAlias(stepModel, routing); + if (fromStep) return { model: fromStep, tier: null, source: "step" }; + + // 2. Per-skill declared default. + const fromSkill = resolveModelAlias(skillModel, routing); + if (fromSkill) return { model: fromSkill, tier: null, source: "skill" }; + + // 3. Automatic classification (opt-in via a routing block). + const autoEnabled = !!routing && routing.enabled !== false && !!(routing.lightweight || routing.reasoning); + if (autoEnabled) { + const tier = classifyTaskTier(classifyText, routing!.rules); + const model = tier === "lightweight" ? routing!.lightweight : routing!.reasoning; + if (model) return { model, tier, source: "auto" }; + } + + // 4. Fallback to the primary model. + return { model: primaryModel, tier: null, source: "fallback" }; +} diff --git a/src/skills.ts b/src/skills.ts index 74879de..fab47ce 100644 --- a/src/skills.ts +++ b/src/skills.ts @@ -11,6 +11,7 @@ export interface SkillMetadata { usage_count?: number; success_count?: number; failure_count?: number; + model?: string; } export interface ParsedSkill extends SkillMetadata { @@ -96,6 +97,7 @@ export async function discoverSkills(agentDir: string): Promise if (typeof frontmatter.usage_count === "number") meta.usage_count = frontmatter.usage_count; if (typeof frontmatter.success_count === "number") meta.success_count = frontmatter.success_count; if (typeof frontmatter.failure_count === "number") meta.failure_count = frontmatter.failure_count; + if (typeof frontmatter.model === "string") meta.model = frontmatter.model; skills.push(meta); } diff --git a/src/voice/server.ts b/src/voice/server.ts index 3acc486..6b3adba 100644 --- a/src/voice/server.ts +++ b/src/voice/server.ts @@ -18,6 +18,8 @@ import { discoverWorkflows, loadFlowDefinition, saveFlowDefinition, deleteFlowDe import { discoverSchedules, saveSchedule, deleteSchedule, updateScheduleMeta } from "../schedules.js"; import { startScheduler, stopScheduler, reloadSchedules, executeScheduledJob } from "../schedule-runner.js"; import cron from "node-cron"; +import yaml from "js-yaml"; +import { resolveRoutedModel, type RoutingConfig } from "../model-routing.js"; const dim = (s: string) => `\x1b[2m${s}\x1b[0m`; const bold = (s: string) => `\x1b[1m${s}\x1b[0m`; @@ -638,10 +640,16 @@ export async function startVoiceServer(opts: VoiceServerOptions): Promise<() => const port = opts.port || 3333; let agentName = "GitAgent"; + // Auto Model Routing config from agent.yaml (model.routing). Issue #48. + let modelRouting: RoutingConfig | undefined; try { const yamlRaw = readFileSync(join(resolve(opts.agentDir), "agent.yaml"), "utf-8"); const m = yamlRaw.match(/^name:\s*(.+)$/m); if (m) agentName = m[1].trim(); + const parsed = yaml.load(yamlRaw) as any; + if (parsed?.model?.routing && typeof parsed.model.routing === "object") { + modelRouting = parsed.model.routing as RoutingConfig; + } } catch { /* fallback to default */ } // Re-read on every request so `npm run build` is picked up live without a server restart. // The file sits in the OS page cache, so the per-request cost is negligible. @@ -830,8 +838,19 @@ export async function startVoiceServer(opts: VoiceServerOptions): Promise<() => sendToBrowser({ type: "transcript", role: "assistant", text: `Running flow: ${flow.name} (${flow.steps.length} steps)` }); + // Per-skill default model from each skill's SKILL.md frontmatter (`model:`). + // Used as a fallback when a step doesn't set its own `model`. + const skillModels = new Map(); + for (const s of await discoverSkills(opts.agentDir)) { + if (s.model) skillModels.set(s.name, s.model); + } + let runningContext = userContext; + // Observability for auto model routing (issue #48): per-step model + // selection plus token/cost totals, summarized in the execution log. + const routeLog: Array<{ step: number; skill: string; model: string; tier: string; source: string; tokens: number; costUsd: number }> = []; + for (let i = 0; i < flow.steps.length; i++) { const step = flow.steps[i]; @@ -859,7 +878,22 @@ export async function startVoiceServer(opts: VoiceServerOptions): Promise<() => continue; } - sendToBrowser({ type: "agent_working" as any, query: `Step ${i + 1}/${flow.steps.length}: ${step.skill}` } as any); + // Auto model routing (issue #48): classify the task by complexity and + // route it to the appropriate model. Explicit per-step model wins, then + // the skill's declared default, then automatic classification, then the + // primary model as fallback. + const route = resolveRoutedModel({ + stepModel: step.model, + skillModel: skillModels.get(step.skill), + classifyText: `${step.skill} ${step.prompt}`, + routing: modelRouting, + primaryModel: opts.model, + }); + const stepModel = route.model; + const routeNote = route.source === "auto" ? `auto/${route.tier}` : route.source; + + sendToBrowser({ type: "agent_working" as any, + query: `Step ${i + 1}/${flow.steps.length}: ${step.skill}${stepModel ? ` (${stepModel} · ${routeNote})` : ""}` } as any); const prompt = `Use the skill "${step.skill}" (load it with /skill:${step.skill}). ${step.prompt.replace(/\{input\}/g, userContext)} @@ -870,22 +904,50 @@ ${runningContext}`; const result = query({ prompt, dir: opts.agentDir, - model: opts.model, + model: stepModel, env: opts.env, }); let stepOutput = ""; + let stepTokens = 0; + let stepCost = 0; for await (const msg of result) { if (msg.type === "assistant" && msg.content) stepOutput += msg.content; + if (msg.type === "assistant" && msg.usage) { + stepTokens += msg.usage.totalTokens ?? 0; + stepCost += msg.usage.costUsd ?? 0; + } if (msg.type === "tool_use") sendToBrowser({ type: "tool_call", toolName: msg.toolName, args: msg.args } as any); if (msg.type === "tool_result") sendToBrowser({ type: "tool_result", toolName: msg.toolName, content: msg.content, isError: msg.isError } as any); } + routeLog.push({ + step: i + 1, skill: step.skill, model: stepModel ?? "(default)", + tier: route.tier ?? "-", source: route.source, tokens: stepTokens, costUsd: stepCost, + }); + runningContext += `\n\n[Step ${i + 1} result (${step.skill})]: ${stepOutput}`; sendToBrowser({ type: "agent_done" as any, result: `Step ${i + 1} complete` } as any); } - sendToBrowser({ type: "transcript", role: "assistant", text: `Flow "${flow.name}" completed.` }); + // Routing summary — model selected per task plus token/cost totals (issue #48). + if (routeLog.length > 0) { + const totalTokens = routeLog.reduce((a, r) => a + r.tokens, 0); + const totalCost = routeLog.reduce((a, r) => a + r.costUsd, 0); + const autoSteps = routeLog.filter((r) => r.source === "auto"); + const lightCount = autoSteps.filter((r) => r.tier === "lightweight").length; + console.log(dim(`[routing] Flow "${flow.name}" summary — ${routeLog.length} steps, ${totalTokens} tokens, $${totalCost.toFixed(4)}`)); + for (const r of routeLog) { + console.log(dim(`[routing] step ${r.step} ${r.skill}: ${r.model} [${r.source}${r.tier !== "-" ? "/" + r.tier : ""}] ${r.tokens} tok $${r.costUsd.toFixed(4)}`)); + } + const autoNote = autoSteps.length > 0 + ? ` · auto-routed ${autoSteps.length} (${lightCount} → lightweight)` + : ""; + sendToBrowser({ type: "transcript", role: "assistant", + text: `Flow "${flow.name}" completed. ${routeLog.length} steps · ${totalTokens} tokens · $${totalCost.toFixed(4)}${autoNote}` }); + } else { + sendToBrowser({ type: "transcript", role: "assistant", text: `Flow "${flow.name}" completed.` }); + } } // ── File API helpers ──────────────────────────────────────────────── @@ -2555,7 +2617,7 @@ return false; } else if (url.pathname === "/api/flows/save" && req.method === "POST") { const body = await readBody(req); - let parsed: { name: string; description: string; steps: { skill: string; prompt: string; channel?: string }[] }; + let parsed: { name: string; description: string; steps: { skill: string; prompt: string; channel?: string; model?: string }[] }; try { parsed = JSON.parse(body); } catch { return jsonReply(res, 400, { error: "Invalid JSON" }); } if (!parsed.name || !parsed.steps?.length) return jsonReply(res, 400, { error: "Missing name or steps" }); try { diff --git a/src/workflows.ts b/src/workflows.ts index 03fa300..1ed13ab 100644 --- a/src/workflows.ts +++ b/src/workflows.ts @@ -7,6 +7,7 @@ export interface SkillFlowStep { skill: string; prompt: string; channel?: string; + model?: string; } export interface SkillFlowDefinition { @@ -68,6 +69,7 @@ export async function discoverWorkflows(agentDir: string): Promise ({ skill: s.skill, prompt: s.prompt, ...(s.channel ? { channel: s.channel } : {}) })), + steps: flow.steps.map((s) => ({ skill: s.skill, prompt: s.prompt, ...(s.channel ? { channel: s.channel } : {}), ...(s.model ? { model: s.model } : {}) })), }, { lineWidth: 120 }); await writeFile(filePath, content, "utf-8"); return filePath;