Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ export interface AgentManifest {
top_k?: number;
stop_sequences?: string[];
};
routing?: {
enabled?: boolean;
lightweight?: string;
reasoning?: string;
rules?: Array<{ tier: "lightweight" | "reasoning"; match: string[] }>;
};
};
tools: string[];
skills?: string[];
Expand Down
143 changes: 143 additions & 0 deletions src/model-routing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// Auto Model Routing (issue #48)
//
// Classifies each task in an agent workflow by complexity and routes it to the
// most appropriate model: lightweight tasks (summarize/extract/classify/
// transform) go to a cheap model, while reasoning-intensive tasks (search,
// planning, decision-making, tool orchestration, complex problem solving)
// stay on the configured reasoning model. Explicit per-step / per-skill model
// settings always win, and anything unresolved falls back to the primary model.

export type ModelTier = "lightweight" | "reasoning";

export interface RoutingConfig {
/** Master switch. Defaults to true when a routing block is present. */
enabled?: boolean;
/** Concrete model id for lightweight tasks, e.g. "openai:gpt-4o-mini". */
lightweight?: string;
/** Concrete model id for reasoning tasks, e.g. "openai:gpt-4o". */
reasoning?: string;
/** User overrides for classification — first matching rule wins. */
rules?: Array<{ tier: ModelTier; match: string[] }>;
}

export interface RouteInput {
/** Explicit per-step model (highest priority). May be an alias or model id. */
stepModel?: string;
/** Per-skill default model from SKILL.md frontmatter. May be an alias or id. */
skillModel?: string;
/** Text used to classify the task (typically skill name + step prompt). */
classifyText: string;
/** Routing configuration from agent.yaml (model.routing). */
routing?: RoutingConfig;
/** The agent's primary/preferred model — the ultimate fallback. */
primaryModel?: string;
}

export interface RouteResult {
/** Resolved concrete "provider:model" string (undefined → let runtime decide). */
model?: string;
/** The complexity tier, when the model came from automatic classification. */
tier: ModelTier | null;
/** Where the decision came from. */
source: "step" | "skill" | "auto" | "fallback";
}

// Default task-to-tier keyword framework, derived directly from the issue's
// recommended task-type table. Matched against word starts so "summarize",
// "summary" and "summarization" all hit "summ", without false positives like
// "already" matching "read".
const DEFAULT_LIGHTWEIGHT = [
"summ", "extract", "classif", "transform", "format", "convert",
"parse", "fetch", "read", "load", "lookup", "normaliz", "translat",
"rephrase", "rewrite", "tag", "label", "render",
];
const DEFAULT_REASONING = [
"search", "analy", "plan", "decid", "decision", "orchestrat", "solve",
"reason", "validat", "evaluat", "review", "audit", "diagnos", "debug",
"architect", "design", "strateg", "investigat", "assess", "judge",
"verify", "critique", "infer", "deduc",
];

function matchesAny(text: string, keywords: string[]): boolean {
for (const kw of keywords) {
// Word-start boundary: keyword must begin a word.
const re = new RegExp(`\\b${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`, "i");
if (re.test(text)) return true;
}
return false;
}

/**
* Classify a task into a complexity tier. User-defined rules (from
* model.routing.rules) take precedence over the built-in defaults. When a task
* matches neither — or matches both — it defaults to "reasoning" so that
* reasoning quality is never sacrificed to save cost.
*/
export function classifyTaskTier(
classifyText: string,
rules?: Array<{ tier: ModelTier; match: string[] }>,
): ModelTier {
const text = classifyText || "";

// User overrides first, in declaration order.
if (rules) {
for (const rule of rules) {
if (Array.isArray(rule.match) && matchesAny(text, rule.match)) {
return rule.tier;
}
}
}

const hasReasoning = matchesAny(text, DEFAULT_REASONING);
if (hasReasoning) return "reasoning";
const hasLightweight = matchesAny(text, DEFAULT_LIGHTWEIGHT);
if (hasLightweight) return "lightweight";

// Unknown → keep quality high.
return "reasoning";
}

/**
* Resolve a model reference that may be a routing-tier alias
* ("lightweight"/"reasoning") or a literal "provider:model" id.
*/
export function resolveModelAlias(ref: string | undefined, routing?: RoutingConfig): string | undefined {
if (!ref) return undefined;
if (ref === "lightweight") return routing?.lightweight || undefined;
if (ref === "reasoning") return routing?.reasoning || undefined;
return ref;
}

/**
* Decide which model a task should run on. Precedence:
* 1. explicit per-step model (source: "step")
* 2. per-skill declared model (source: "skill")
* 3. automatic classification (source: "auto") — when routing is enabled
* 4. primary/preferred model (source: "fallback")
*
* Automatic routing is active only when a routing block is present and not
* disabled. If classification picks a tier with no configured model, it falls
* through to the primary model (fallback on routing failure).
*/
export function resolveRoutedModel(input: RouteInput): RouteResult {
const { stepModel, skillModel, classifyText, routing, primaryModel } = input;

// 1. Explicit per-step override.
const fromStep = resolveModelAlias(stepModel, routing);
if (fromStep) return { model: fromStep, tier: null, source: "step" };

// 2. Per-skill declared default.
const fromSkill = resolveModelAlias(skillModel, routing);
if (fromSkill) return { model: fromSkill, tier: null, source: "skill" };

// 3. Automatic classification (opt-in via a routing block).
const autoEnabled = !!routing && routing.enabled !== false && !!(routing.lightweight || routing.reasoning);
if (autoEnabled) {
const tier = classifyTaskTier(classifyText, routing!.rules);
const model = tier === "lightweight" ? routing!.lightweight : routing!.reasoning;
if (model) return { model, tier, source: "auto" };
}

// 4. Fallback to the primary model.
return { model: primaryModel, tier: null, source: "fallback" };
}
2 changes: 2 additions & 0 deletions src/skills.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export interface SkillMetadata {
usage_count?: number;
success_count?: number;
failure_count?: number;
model?: string;
}

export interface ParsedSkill extends SkillMetadata {
Expand Down Expand Up @@ -96,6 +97,7 @@ export async function discoverSkills(agentDir: string): Promise<SkillMetadata[]>
if (typeof frontmatter.usage_count === "number") meta.usage_count = frontmatter.usage_count;
if (typeof frontmatter.success_count === "number") meta.success_count = frontmatter.success_count;
if (typeof frontmatter.failure_count === "number") meta.failure_count = frontmatter.failure_count;
if (typeof frontmatter.model === "string") meta.model = frontmatter.model;

skills.push(meta);
}
Expand Down
70 changes: 66 additions & 4 deletions src/voice/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import { discoverWorkflows, loadFlowDefinition, saveFlowDefinition, deleteFlowDe
import { discoverSchedules, saveSchedule, deleteSchedule, updateScheduleMeta } from "../schedules.js";
import { startScheduler, stopScheduler, reloadSchedules, executeScheduledJob } from "../schedule-runner.js";
import cron from "node-cron";
import yaml from "js-yaml";
import { resolveRoutedModel, type RoutingConfig } from "../model-routing.js";

const dim = (s: string) => `\x1b[2m${s}\x1b[0m`;
const bold = (s: string) => `\x1b[1m${s}\x1b[0m`;
Expand Down Expand Up @@ -638,10 +640,16 @@ export async function startVoiceServer(opts: VoiceServerOptions): Promise<() =>

const port = opts.port || 3333;
let agentName = "GitAgent";
// Auto Model Routing config from agent.yaml (model.routing). Issue #48.
let modelRouting: RoutingConfig | undefined;
try {
const yamlRaw = readFileSync(join(resolve(opts.agentDir), "agent.yaml"), "utf-8");
const m = yamlRaw.match(/^name:\s*(.+)$/m);
if (m) agentName = m[1].trim();
const parsed = yaml.load(yamlRaw) as any;
if (parsed?.model?.routing && typeof parsed.model.routing === "object") {
modelRouting = parsed.model.routing as RoutingConfig;
}
} catch { /* fallback to default */ }
// Re-read on every request so `npm run build` is picked up live without a server restart.
// The file sits in the OS page cache, so the per-request cost is negligible.
Expand Down Expand Up @@ -830,8 +838,19 @@ export async function startVoiceServer(opts: VoiceServerOptions): Promise<() =>
sendToBrowser({ type: "transcript", role: "assistant",
text: `Running flow: ${flow.name} (${flow.steps.length} steps)` });

// Per-skill default model from each skill's SKILL.md frontmatter (`model:`).
// Used as a fallback when a step doesn't set its own `model`.
const skillModels = new Map<string, string>();
for (const s of await discoverSkills(opts.agentDir)) {
if (s.model) skillModels.set(s.name, s.model);
}

let runningContext = userContext;

// Observability for auto model routing (issue #48): per-step model
// selection plus token/cost totals, summarized in the execution log.
const routeLog: Array<{ step: number; skill: string; model: string; tier: string; source: string; tokens: number; costUsd: number }> = [];

for (let i = 0; i < flow.steps.length; i++) {
const step = flow.steps[i];

Expand Down Expand Up @@ -859,7 +878,22 @@ export async function startVoiceServer(opts: VoiceServerOptions): Promise<() =>
continue;
}

sendToBrowser({ type: "agent_working" as any, query: `Step ${i + 1}/${flow.steps.length}: ${step.skill}` } as any);
// Auto model routing (issue #48): classify the task by complexity and
// route it to the appropriate model. Explicit per-step model wins, then
// the skill's declared default, then automatic classification, then the
// primary model as fallback.
const route = resolveRoutedModel({
stepModel: step.model,
skillModel: skillModels.get(step.skill),
classifyText: `${step.skill} ${step.prompt}`,
routing: modelRouting,
primaryModel: opts.model,
});
const stepModel = route.model;
const routeNote = route.source === "auto" ? `auto/${route.tier}` : route.source;

sendToBrowser({ type: "agent_working" as any,
query: `Step ${i + 1}/${flow.steps.length}: ${step.skill}${stepModel ? ` (${stepModel} · ${routeNote})` : ""}` } as any);

const prompt = `Use the skill "${step.skill}" (load it with /skill:${step.skill}).
${step.prompt.replace(/\{input\}/g, userContext)}
Expand All @@ -870,22 +904,50 @@ ${runningContext}`;
const result = query({
prompt,
dir: opts.agentDir,
model: opts.model,
model: stepModel,
env: opts.env,
});

let stepOutput = "";
let stepTokens = 0;
let stepCost = 0;
for await (const msg of result) {
if (msg.type === "assistant" && msg.content) stepOutput += msg.content;
if (msg.type === "assistant" && msg.usage) {
stepTokens += msg.usage.totalTokens ?? 0;
stepCost += msg.usage.costUsd ?? 0;
}
if (msg.type === "tool_use") sendToBrowser({ type: "tool_call", toolName: msg.toolName, args: msg.args } as any);
if (msg.type === "tool_result") sendToBrowser({ type: "tool_result", toolName: msg.toolName, content: msg.content, isError: msg.isError } as any);
}

routeLog.push({
step: i + 1, skill: step.skill, model: stepModel ?? "(default)",
tier: route.tier ?? "-", source: route.source, tokens: stepTokens, costUsd: stepCost,
});

runningContext += `\n\n[Step ${i + 1} result (${step.skill})]: ${stepOutput}`;
sendToBrowser({ type: "agent_done" as any, result: `Step ${i + 1} complete` } as any);
}

sendToBrowser({ type: "transcript", role: "assistant", text: `Flow "${flow.name}" completed.` });
// Routing summary — model selected per task plus token/cost totals (issue #48).
if (routeLog.length > 0) {
const totalTokens = routeLog.reduce((a, r) => a + r.tokens, 0);
const totalCost = routeLog.reduce((a, r) => a + r.costUsd, 0);
const autoSteps = routeLog.filter((r) => r.source === "auto");
const lightCount = autoSteps.filter((r) => r.tier === "lightweight").length;
console.log(dim(`[routing] Flow "${flow.name}" summary — ${routeLog.length} steps, ${totalTokens} tokens, $${totalCost.toFixed(4)}`));
for (const r of routeLog) {
console.log(dim(`[routing] step ${r.step} ${r.skill}: ${r.model} [${r.source}${r.tier !== "-" ? "/" + r.tier : ""}] ${r.tokens} tok $${r.costUsd.toFixed(4)}`));
}
const autoNote = autoSteps.length > 0
? ` · auto-routed ${autoSteps.length} (${lightCount} → lightweight)`
: "";
sendToBrowser({ type: "transcript", role: "assistant",
text: `Flow "${flow.name}" completed. ${routeLog.length} steps · ${totalTokens} tokens · $${totalCost.toFixed(4)}${autoNote}` });
} else {
sendToBrowser({ type: "transcript", role: "assistant", text: `Flow "${flow.name}" completed.` });
}
}

// ── File API helpers ────────────────────────────────────────────────
Expand Down Expand Up @@ -2555,7 +2617,7 @@ return false;

} else if (url.pathname === "/api/flows/save" && req.method === "POST") {
const body = await readBody(req);
let parsed: { name: string; description: string; steps: { skill: string; prompt: string; channel?: string }[] };
let parsed: { name: string; description: string; steps: { skill: string; prompt: string; channel?: string; model?: string }[] };
try { parsed = JSON.parse(body); } catch { return jsonReply(res, 400, { error: "Invalid JSON" }); }
if (!parsed.name || !parsed.steps?.length) return jsonReply(res, 400, { error: "Missing name or steps" });
try {
Expand Down
5 changes: 4 additions & 1 deletion src/workflows.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export interface SkillFlowStep {
skill: string;
prompt: string;
channel?: string;
model?: string;
}

export interface SkillFlowDefinition {
Expand Down Expand Up @@ -68,6 +69,7 @@ export async function discoverWorkflows(agentDir: string): Promise<WorkflowMetad
skill: String(s.skill || ""),
prompt: String(s.prompt || ""),
...(s.channel ? { channel: String(s.channel) } : {}),
...(s.model ? { model: String(s.model) } : {}),
})),
} : { type: "basic" as const }),
});
Expand Down Expand Up @@ -113,6 +115,7 @@ export async function loadFlowDefinition(filePath: string): Promise<SkillFlowDef
skill: String(s.skill || ""),
prompt: String(s.prompt || ""),
...(s.channel ? { channel: String(s.channel) } : {}),
...(s.model ? { model: String(s.model) } : {}),
})),
};
}
Expand All @@ -130,7 +133,7 @@ export async function saveFlowDefinition(agentDir: string, flow: SkillFlowDefini
const content = yaml.dump({
name: flow.name,
description: flow.description || "",
steps: flow.steps.map((s) => ({ skill: s.skill, prompt: s.prompt, ...(s.channel ? { channel: s.channel } : {}) })),
steps: flow.steps.map((s) => ({ skill: s.skill, prompt: s.prompt, ...(s.channel ? { channel: s.channel } : {}), ...(s.model ? { model: s.model } : {}) })),
}, { lineWidth: 120 });
await writeFile(filePath, content, "utf-8");
return filePath;
Expand Down