open-gitagent · RiteshTiwari1 · Jun 1, 2026
diff --git a/src/loader.ts b/src/loader.ts
@@ -42,6 +42,12 @@ export interface AgentManifest {
 			top_k?: number;
 			stop_sequences?: string[];
 		};
+		routing?: {
+			enabled?: boolean;
+			lightweight?: string;
+			reasoning?: string;
+			rules?: Array<{ tier: "lightweight" | "reasoning"; match: string[] }>;
+		};
 	};
 	tools: string[];
 	skills?: string[];

diff --git a/src/model-routing.ts b/src/model-routing.ts
@@ -0,0 +1,143 @@
+// Auto Model Routing (issue #48)
+//
+// Classifies each task in an agent workflow by complexity and routes it to the
+// most appropriate model: lightweight tasks (summarize/extract/classify/
+// transform) go to a cheap model, while reasoning-intensive tasks (search,
+// planning, decision-making, tool orchestration, complex problem solving)
+// stay on the configured reasoning model. Explicit per-step / per-skill model
+// settings always win, and anything unresolved falls back to the primary model.
+
+export type ModelTier = "lightweight" | "reasoning";
+
+export interface RoutingConfig {
+	/** Master switch. Defaults to true when a routing block is present. */
+	enabled?: boolean;
+	/** Concrete model id for lightweight tasks, e.g. "openai:gpt-4o-mini". */
+	lightweight?: string;
+	/** Concrete model id for reasoning tasks, e.g. "openai:gpt-4o". */
+	reasoning?: string;
+	/** User overrides for classification — first matching rule wins. */
+	rules?: Array<{ tier: ModelTier; match: string[] }>;
+}
+
+export interface RouteInput {
+	/** Explicit per-step model (highest priority). May be an alias or model id. */
+	stepModel?: string;
+	/** Per-skill default model from SKILL.md frontmatter. May be an alias or id. */
+	skillModel?: string;
+	/** Text used to classify the task (typically skill name + step prompt). */
+	classifyText: string;
+	/** Routing configuration from agent.yaml (model.routing). */
+	routing?: RoutingConfig;
+	/** The agent's primary/preferred model — the ultimate fallback. */
+	primaryModel?: string;
+}
+
+export interface RouteResult {
+	/** Resolved concrete "provider:model" string (undefined → let runtime decide). */
+	model?: string;
+	/** The complexity tier, when the model came from automatic classification. */
+	tier: ModelTier | null;
+	/** Where the decision came from. */
+	source: "step" | "skill" | "auto" | "fallback";
+}
+
+// Default task-to-tier keyword framework, derived directly from the issue's
+// recommended task-type table. Matched against word starts so "summarize",
+// "summary" and "summarization" all hit "summ", without false positives like
+// "already" matching "read".
+const DEFAULT_LIGHTWEIGHT = [
+	"summ", "extract", "classif", "transform", "format", "convert",
+	"parse", "fetch", "read", "load", "lookup", "normaliz", "translat",
+	"rephrase", "rewrite", "tag", "label", "render",
+];
+const DEFAULT_REASONING = [
+	"search", "analy", "plan", "decid", "decision", "orchestrat", "solve",
+	"reason", "validat", "evaluat", "review", "audit", "diagnos", "debug",
+	"architect", "design", "strateg", "investigat", "assess", "judge",
+	"verify", "critique", "infer", "deduc",
+];
+
+function matchesAny(text: string, keywords: string[]): boolean {
+	for (const kw of keywords) {
+		// Word-start boundary: keyword must begin a word.
+		const re = new RegExp(`\\b${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`, "i");
+		if (re.test(text)) return true;
+	}
+	return false;
+}
+
+/**
+ * Classify a task into a complexity tier. User-defined rules (from
+ * model.routing.rules) take precedence over the built-in defaults. When a task
+ * matches neither — or matches both — it defaults to "reasoning" so that
+ * reasoning quality is never sacrificed to save cost.
+ */
+export function classifyTaskTier(
+	classifyText: string,
+	rules?: Array<{ tier: ModelTier; match: string[] }>,
+): ModelTier {
+	const text = classifyText || "";
+
+	// User overrides first, in declaration order.
+	if (rules) {
+		for (const rule of rules) {
+			if (Array.isArray(rule.match) && matchesAny(text, rule.match)) {
+				return rule.tier;
+			}
+		}
+	}
+
+	const hasReasoning = matchesAny(text, DEFAULT_REASONING);
+	if (hasReasoning) return "reasoning";
+	const hasLightweight = matchesAny(text, DEFAULT_LIGHTWEIGHT);
+	if (hasLightweight) return "lightweight";
+
+	// Unknown → keep quality high.
+	return "reasoning";
+}
+
+/**
+ * Resolve a model reference that may be a routing-tier alias
+ * ("lightweight"/"reasoning") or a literal "provider:model" id.
+ */
+export function resolveModelAlias(ref: string | undefined, routing?: RoutingConfig): string | undefined {
+	if (!ref) return undefined;
+	if (ref === "lightweight") return routing?.lightweight || undefined;
+	if (ref === "reasoning") return routing?.reasoning || undefined;
+	return ref;
+}
+
+/**
+ * Decide which model a task should run on. Precedence:
+ *   1. explicit per-step model      (source: "step")
+ *   2. per-skill declared model      (source: "skill")
+ *   3. automatic classification      (source: "auto")  — when routing is enabled
+ *   4. primary/preferred model       (source: "fallback")
+ *
+ * Automatic routing is active only when a routing block is present and not
+ * disabled. If classification picks a tier with no configured model, it falls
+ * through to the primary model (fallback on routing failure).
+ */
+export function resolveRoutedModel(input: RouteInput): RouteResult {
+	const { stepModel, skillModel, classifyText, routing, primaryModel } = input;
+
+	// 1. Explicit per-step override.
+	const fromStep = resolveModelAlias(stepModel, routing);
+	if (fromStep) return { model: fromStep, tier: null, source: "step" };
+
+	// 2. Per-skill declared default.
+	const fromSkill = resolveModelAlias(skillModel, routing);
+	if (fromSkill) return { model: fromSkill, tier: null, source: "skill" };
+
+	// 3. Automatic classification (opt-in via a routing block).
+	const autoEnabled = !!routing && routing.enabled !== false && !!(routing.lightweight || routing.reasoning);
+	if (autoEnabled) {
+		const tier = classifyTaskTier(classifyText, routing!.rules);
+		const model = tier === "lightweight" ? routing!.lightweight : routing!.reasoning;
+		if (model) return { model, tier, source: "auto" };
+	}
+
+	// 4. Fallback to the primary model.
+	return { model: primaryModel, tier: null, source: "fallback" };
+}
diff --git a/src/skills.ts b/src/skills.ts
@@ -11,6 +11,7 @@ export interface SkillMetadata {
 	usage_count?: number;
 	success_count?: number;
 	failure_count?: number;
+	model?: string;
 }
 
 export interface ParsedSkill extends SkillMetadata {
@@ -96,6 +97,7 @@ export async function discoverSkills(agentDir: string): Promise<SkillMetadata[]>
 		if (typeof frontmatter.usage_count === "number") meta.usage_count = frontmatter.usage_count;
 		if (typeof frontmatter.success_count === "number") meta.success_count = frontmatter.success_count;
 		if (typeof frontmatter.failure_count === "number") meta.failure_count = frontmatter.failure_count;
+		if (typeof frontmatter.model === "string") meta.model = frontmatter.model;
 
 		skills.push(meta);
 	}

diff --git a/src/voice/server.ts b/src/voice/server.ts
@@ -18,6 +18,8 @@ import { discoverWorkflows, loadFlowDefinition, saveFlowDefinition, deleteFlowDe
 import { discoverSchedules, saveSchedule, deleteSchedule, updateScheduleMeta } from "../schedules.js";
 import { startScheduler, stopScheduler, reloadSchedules, executeScheduledJob } from "../schedule-runner.js";
 import cron from "node-cron";
+import yaml from "js-yaml";
+import { resolveRoutedModel, type RoutingConfig } from "../model-routing.js";
 
 const dim = (s: string) => `\x1b[2m${s}\x1b[0m`;
 const bold = (s: string) => `\x1b[1m${s}\x1b[0m`;
@@ -638,10 +640,16 @@ export async function startVoiceServer(opts: VoiceServerOptions): Promise<() =>
 
 	const port = opts.port || 3333;
 	let agentName = "GitAgent";
+	// Auto Model Routing config from agent.yaml (model.routing). Issue #48.
+	let modelRouting: RoutingConfig | undefined;
 	try {
 		const yamlRaw = readFileSync(join(resolve(opts.agentDir), "agent.yaml"), "utf-8");
 		const m = yamlRaw.match(/^name:\s*(.+)$/m);
 		if (m) agentName = m[1].trim();
+		const parsed = yaml.load(yamlRaw) as any;
+		if (parsed?.model?.routing && typeof parsed.model.routing === "object") {
+			modelRouting = parsed.model.routing as RoutingConfig;
+		}
 	} catch { /* fallback to default */ }
 	// Re-read on every request so `npm run build` is picked up live without a server restart.
 	// The file sits in the OS page cache, so the per-request cost is negligible.
@@ -830,8 +838,19 @@ export async function startVoiceServer(opts: VoiceServerOptions): Promise<() =>
 		sendToBrowser({ type: "transcript", role: "assistant",
 			text: `Running flow: ${flow.name} (${flow.steps.length} steps)` });
 
+		// Per-skill default model from each skill's SKILL.md frontmatter (`model:`).
+		// Used as a fallback when a step doesn't set its own `model`.
+		const skillModels = new Map<string, string>();
+		for (const s of await discoverSkills(opts.agentDir)) {
+			if (s.model) skillModels.set(s.name, s.model);
+		}
+
 		let runningContext = userContext;
 
+		// Observability for auto model routing (issue #48): per-step model
+		// selection plus token/cost totals, summarized in the execution log.
+		const routeLog: Array<{ step: number; skill: string; model: string; tier: string; source: string; tokens: number; costUsd: number }> = [];
+
 		for (let i = 0; i < flow.steps.length; i++) {
 			const step = flow.steps[i];
 
@@ -859,7 +878,22 @@ export async function startVoiceServer(opts: VoiceServerOptions): Promise<() =>
 				continue;
 			}
 
-			sendToBrowser({ type: "agent_working" as any, query: `Step ${i + 1}/${flow.steps.length}: ${step.skill}` } as any);
+			// Auto model routing (issue #48): classify the task by complexity and
+			// route it to the appropriate model. Explicit per-step model wins, then
+			// the skill's declared default, then automatic classification, then the
+			// primary model as fallback.
+			const route = resolveRoutedModel({
+				stepModel: step.model,
+				skillModel: skillModels.get(step.skill),
+				classifyText: `${step.skill} ${step.prompt}`,
+				routing: modelRouting,
+				primaryModel: opts.model,
+			});
+			const stepModel = route.model;
+			const routeNote = route.source === "auto" ? `auto/${route.tier}` : route.source;
+
+			sendToBrowser({ type: "agent_working" as any,
+				query: `Step ${i + 1}/${flow.steps.length}: ${step.skill}${stepModel ? ` (${stepModel} · ${routeNote})` : ""}` } as any);
 
 			const prompt = `Use the skill "${step.skill}" (load it with /skill:${step.skill}).
 ${step.prompt.replace(/\{input\}/g, userContext)}
@@ -870,22 +904,50 @@ ${runningContext}`;
 			const result = query({
 				prompt,
 				dir: opts.agentDir,
-				model: opts.model,
+				model: stepModel,
 				env: opts.env,
 			});
 
 			let stepOutput = "";
+			let stepTokens = 0;
+			let stepCost = 0;
 			for await (const msg of result) {
 				if (msg.type === "assistant" && msg.content) stepOutput += msg.content;
+				if (msg.type === "assistant" && msg.usage) {
+					stepTokens += msg.usage.totalTokens ?? 0;
+					stepCost += msg.usage.costUsd ?? 0;
+				}
 				if (msg.type === "tool_use") sendToBrowser({ type: "tool_call", toolName: msg.toolName, args: msg.args } as any);
 				if (msg.type === "tool_result") sendToBrowser({ type: "tool_result", toolName: msg.toolName, content: msg.content, isError: msg.isError } as any);
 			}
 
+			routeLog.push({
+				step: i + 1, skill: step.skill, model: stepModel ?? "(default)",
+				tier: route.tier ?? "-", source: route.source, tokens: stepTokens, costUsd: stepCost,
+			});
+
 			runningContext += `\n\n[Step ${i + 1} result (${step.skill})]: ${stepOutput}`;
 			sendToBrowser({ type: "agent_done" as any, result: `Step ${i + 1} complete` } as any);
 		}
 
-		sendToBrowser({ type: "transcript", role: "assistant", text: `Flow "${flow.name}" completed.` });
+		// Routing summary — model selected per task plus token/cost totals (issue #48).
+		if (routeLog.length > 0) {
+			const totalTokens = routeLog.reduce((a, r) => a + r.tokens, 0);
+			const totalCost = routeLog.reduce((a, r) => a + r.costUsd, 0);
+			const autoSteps = routeLog.filter((r) => r.source === "auto");
+			const lightCount = autoSteps.filter((r) => r.tier === "lightweight").length;
+			console.log(dim(`[routing] Flow "${flow.name}" summary — ${routeLog.length} steps, ${totalTokens} tokens, $${totalCost.toFixed(4)}`));
+			for (const r of routeLog) {
+				console.log(dim(`[routing]   step ${r.step} ${r.skill}: ${r.model} [${r.source}${r.tier !== "-" ? "/" + r.tier : ""}] ${r.tokens} tok $${r.costUsd.toFixed(4)}`));
+			}
+			const autoNote = autoSteps.length > 0
+				? ` · auto-routed ${autoSteps.length} (${lightCount} → lightweight)`
+				: "";
+			sendToBrowser({ type: "transcript", role: "assistant",
+				text: `Flow "${flow.name}" completed. ${routeLog.length} steps · ${totalTokens} tokens · $${totalCost.toFixed(4)}${autoNote}` });
+		} else {
+			sendToBrowser({ type: "transcript", role: "assistant", text: `Flow "${flow.name}" completed.` });
+		}
 	}
 
 	// ── File API helpers ────────────────────────────────────────────────
@@ -2555,7 +2617,7 @@ return false;
 
 		} else if (url.pathname === "/api/flows/save" && req.method === "POST") {
 			const body = await readBody(req);
-			let parsed: { name: string; description: string; steps: { skill: string; prompt: string; channel?: string }[] };
+			let parsed: { name: string; description: string; steps: { skill: string; prompt: string; channel?: string; model?: string }[] };
 			try { parsed = JSON.parse(body); } catch { return jsonReply(res, 400, { error: "Invalid JSON" }); }
 			if (!parsed.name || !parsed.steps?.length) return jsonReply(res, 400, { error: "Missing name or steps" });
 			try {

diff --git a/src/workflows.ts b/src/workflows.ts
@@ -7,6 +7,7 @@ export interface SkillFlowStep {
 	skill: string;
 	prompt: string;
 	channel?: string;
+	model?: string;
 }
 
 export interface SkillFlowDefinition {
@@ -68,6 +69,7 @@ export async function discoverWorkflows(agentDir: string): Promise<WorkflowMetad
 								skill: String(s.skill || ""),
 								prompt: String(s.prompt || ""),
 								...(s.channel ? { channel: String(s.channel) } : {}),
+								...(s.model ? { model: String(s.model) } : {}),
 							})),
 						} : { type: "basic" as const }),
 					});
@@ -113,6 +115,7 @@ export async function loadFlowDefinition(filePath: string): Promise<SkillFlowDef
 			skill: String(s.skill || ""),
 			prompt: String(s.prompt || ""),
 			...(s.channel ? { channel: String(s.channel) } : {}),
+			...(s.model ? { model: String(s.model) } : {}),
 		})),
 	};
 }
@@ -130,7 +133,7 @@ export async function saveFlowDefinition(agentDir: string, flow: SkillFlowDefini
 	const content = yaml.dump({
 		name: flow.name,
 		description: flow.description || "",
-		steps: flow.steps.map((s) => ({ skill: s.skill, prompt: s.prompt, ...(s.channel ? { channel: s.channel } : {}) })),
+		steps: flow.steps.map((s) => ({ skill: s.skill, prompt: s.prompt, ...(s.channel ? { channel: s.channel } : {}), ...(s.model ? { model: s.model } : {}) })),
 	}, { lineWidth: 120 });
 	await writeFile(filePath, content, "utf-8");
 	return filePath;