From cb8b710ee7b18f7e7464d1796176ba2787aa4ed4 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 15:30:24 +0000 Subject: [PATCH 01/54] docs(adr): propose native MCP client for openab-agent Adds ADR for in-core rmcp + progressive-disclosure meta-tool, deferred to symmetry with the Skills extension pattern from PR #955. Memory analysis rules out the sidecar alternative; per-session config refresh replaces file-watcher hot reload to drop ~150 LOC of race-condition hotspot. Co-Authored-By: Claude Opus 4.7 --- docs/adr/openab-agent-mcp.md | 605 +++++++++++++++++++++++++++++++++++ 1 file changed, 605 insertions(+) create mode 100644 docs/adr/openab-agent-mcp.md diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md new file mode 100644 index 000000000..bd078f604 --- /dev/null +++ b/docs/adr/openab-agent-mcp.md @@ -0,0 +1,605 @@ +# ADR: openab-agent — MCP Client Support + +## 1. Context & Motivation + +`openab-agent` is the native Rust coding agent shipped with OpenAB (Cargo workspace member `openab-agent/`, introduced 2026-05-26 via PR #924, targeted at the v0.8.4-beta series). Its `docs/adr/openab-agent.md` charter commits to a small surface: 4 built-in tools (`read`, `write`, `edit`, `bash`), a ~500-token system prompt, no LLM SDK dependency, multi-model via thin HTTP. PR #955 added `Skills` support (`openab-agent/src/skills.rs`, 224 LOC, zero new crate dependencies) as the first extension mechanism — descriptor-only injection plus on-demand load via the existing `read` tool. + +The agent currently has **no MCP (Model Context Protocol) client**. This ADR proposes one. + +### 1.1 Why MCP for openab-agent + +- **Ecosystem leverage.** Every Postgres/GitHub/Figma/Jira/Slack integration users will ask for already exists as an MCP server (mcpbundles.com tracks ~9k tools across ~1.4k providers as of 2026-Q2). Re-implementing each as a Skill or built-in tool is duplicative. +- **Parity with peer agents.** Claude Code, Codex CLI, Cursor, Cline, Goose, opencode, OpenHands, Kiro, Junie, Roo Code all ship MCP clients. Users coming from any of these expect `mcpServers` config to "just work". +- **Skills cannot replace MCP.** Per Anthropic's framing — **Skills = procedural (how to do); MCP = connectivity (where data/tools live)**. Skills wrap CLI tools; MCP handles network, auth, streaming, server-side state. + +### 1.2 Why now + +Skills landed in PR #955. The repo's design pattern for "first-tier-but-tiny" extension is now established. MCP is the natural next layer. + +### 1.3 Prior internal attempts + +Four MCP PRs to upstream `openabdev/openab` have closed without merging: + +| PR | Title | State | Scope | +|---|---|---|---| +| #329, #330 | `feat(mcp): inject per-user MCP servers from Discord profiles into ACP sessions` | CLOSED | Broker forward | +| #345 | `feat: inject per-user MCP servers into ACP sessions` | CLOSED | Broker forward | +| #903 | `feat(agent): forward configured MCP servers` | CLOSED | Broker forward | + +All four targeted the broker layer — pass MCP server config through to the backing CLI (Claude Code / Codex / Cursor) and let that CLI handle MCP. **None addressed native MCP support inside `openab-agent` itself.** This ADR is scoped to the native agent. + +Issue #753 remains open and is broker-side (`[agent].inherit_cloud_mcp_servers` opt-out). This ADR does not change broker behavior. + +--- + +## 2. Goals & Non-Goals + +### In scope + +- MCP **client** support inside `openab-agent` +- Transports: stdio (local servers — Anthropic reference, npm/pypi community) and Streamable HTTP (vendor-hosted SaaS — Atlassian, Figma, Linear, Notion, Sentry, etc.). HTTP+SSE intentionally omitted — superseded by MCP spec 2025-11-25 and actively sunset by vendors (Atlassian deadline 2026-06-30). See §3.8 for landscape. +- OAuth login flow for MCP servers requiring it +- Per-session lifecycle with idle eviction +- Per-session config refresh — new ACP session re-reads `mcpServers` from disk (no file watcher, no mid-session reload; openab spawns short-lived sessions per thread so process restart is rarely needed) +- Progressive-disclosure tool surface (single meta-tool, not flat fan-out) +- Reuse of existing `src/auth.rs` PKCE / TokenStore where possible + +### Out of scope + +- MCP **server** functionality (host only) +- WASM / cdylib plugin runtime +- Sidecar / out-of-process MCP bridge +- Per-thread MCP isolation (broker concern, not agent) +- Replacing Skills (Skills and MCP coexist) + +--- + +## 3. Prior Art Survey + +Per `docs/adr/pr-contribution-guidelines.md`, OpenClaw and Hermes Agent are the mandatory references for architectural PRs. OpenClaw was evaluated and found **not applicable to this ADR**: it is a multi-channel messaging gateway (chat platforms ↔ MCP), not a coding agent. Its substantial MCP code (~2,900 LOC across `src/agents/mcp-*`, `src/config/mcp-*`, `src/mcp/`) addresses channel bridging rather than agent-side tool calling. The closer comparison for a coding-agent MCP client is **opencode (§3.2)**, included in addition to Hermes Agent. + +Five projects are surveyed below. Each contributes a design pattern the chosen architecture borrows from: + +| § | Project | Borrowed pattern | +|---|---|---| +| 3.1 | Hermes Agent | Circuit breaker (per-server fail threshold + cooldown) | +| 3.2 | opencode | Per-server status enum + RFC 7591 dynamic OAuth | +| 3.3 | pi-mcp-adapter | Single `mcp` meta-tool with action dispatch (progressive disclosure) | +| 3.4 | Goose | MCP-as-primary-extension validation in a Rust codebase | +| 3.5 | OpenHands | `filter_tools_regex` per-server tool scoping | + +### 3.1 Hermes Agent (mandatory reference) + +- Repo: https://github.com/NousResearch/hermes-agent (Python, Apache 2.0) +- MCP module: ~5,175 LOC across 3 files (`mcp_tool.py` + 2 OAuth modules) +- SDK: official `mcp==1.26.0` +- Transports: stdio + Streamable HTTP + SSE +- Tool naming: `mcp_{server}_{tool}` (single-underscore separators, no `__` boundary marker) +- Lifecycle: per-server long-lived `asyncio.Task` on dedicated background event loop +- Lazy loading: eager connect, but background-thread discovery with 0.75s join — non-blocking +- Hot reload: mtime-poll on `~/.hermes/config.yaml` + `/reload-mcp` slash command +- OAuth: mtime-based disk-watch for cross-process token refresh +- **Notable**: ships a real circuit breaker — threshold 3 failures / 60s cooldown / half-open probe state. The only project surveyed that does so. + +### 3.2 opencode (anomalyco/opencode, formerly sst/opencode) + +- Repo: https://github.com/anomalyco/opencode (TypeScript, MIT) — `sst/opencode` 301-redirects here after org transfer +- **Closest coding-agent comparison to openab-agent** +- MCP module: ~1,664 LOC across 5 files (`mcp/`, `auth.ts`, OAuth provider/callback, config) +- SDK: `@modelcontextprotocol/sdk@1.27.1` +- Transports: stdio + Streamable HTTP + SSE +- Tool naming: `{sanitized_client}_{sanitized_tool}` (single underscore) +- Lifecycle: shared singleton service via Effect `Layer`; one `Client` per server +- Lazy loading: eager connect with `concurrency: "unbounded"`; per-server status union prevents one bad server from crashing others +- Hot reload: subscribes to MCP spec's `notifications/tools/list_changed`; **no file watcher** for config — config change still requires restart +- OAuth: RFC 7591 dynamic client registration, callback `http://127.0.0.1:19876/mcp/oauth/callback`, EffectFlock cross-process locking on token store +- **Known issues** (cited as architectural cautionary tales): #11868 (113 GB virtual-memory leak, Windows v1.1.21), #7261 (heap not released + MCP orphan processes, v1.1.6), #13041 (per-session MCP+LSP duplication across concurrent sessions) — all rooted in child-process lifecycle, not protocol code + +### 3.3 pi-mcp-adapter + +- Repo: https://github.com/nicobailon/pi-mcp-adapter (TypeScript, MIT) +- An out-of-tree extension for the Pi coding agent (`pi.extensions`) — Pi itself has **no native MCP** +- MCP module: ~3,661 LOC (server-manager, proxy-modes, direct-tools, OAuth) +- SDK: `@modelcontextprotocol/sdk@^1.25.1` + `@modelcontextprotocol/ext-apps@^1.2.2` +- Transports: stdio + Streamable HTTP + SSE +- **Notable — the reason this is cited**: ships a **single `mcp` meta-tool** with sub-actions (`connect`, `describe`, `search`, `list`, `call`, `status`). All MCP capability is exposed through this one tool. Lazy connect happens inside `lazyConnect()` on first action that needs it. This is the **progressive-disclosure pattern** this ADR adopts. + +### 3.4 Goose (block / aaif-goose) + +- Repo: https://github.com/block/goose → https://github.com/aaif-goose/goose (Rust, Apache 2.0) +- **Most relevant precedent: a Rust coding agent built around MCP** +- Launched Jan 2025 with MCP as the *only* extension surface (no first-party plugin API to retrofit) +- Hand-rolled `mcp-client` crate (predated official Rust SDK) +- Per-session `Agent` owns an `ExtensionManager` that spawns MCP servers (stdio/SSE) as child processes +- Tools flattened into one namespace; extension name used as prefix to avoid collisions +- Supports `tools/list_changed` for hot reload +- Precedent for a Rust agent shipping MCP as the primary extension surface without WASM / cdylib / sidecar plumbing. + +### 3.5 OpenHands (All-Hands-AI) + +- Repo: https://github.com/OpenHands/OpenHands (Python, MIT) +- SDK: FastMCP (jlowin/fastmcp), not the reference SDK +- **Notable**: per-agent `filter_tools_regex` config — subset a server's tools without modifying the server. OAuth tokens cached under `~/.fastmcp/oauth-mcp-client-cache/` with auto-refresh; explicit "incompatible with headless" caveat for browser-based auth. +- Cited for OAuth + tool-surface scoping patterns where Hermes/opencode/Pi are weaker. + +### 3.6 Comparison matrix + +| | Hermes | opencode | pi-mcp-adapter | Goose | OpenHands | +|---|---|---|---|---|---| +| Language | Python | TS | TS | Rust | Python | +| SDK | mcp 1.26 | sdk 1.27 | sdk 1.25 | hand-rolled | FastMCP | +| Transports | stdio+HTTP+SSE | stdio+HTTP+SSE | stdio+HTTP+SSE | stdio+SSE | stdio+HTTP | +| Tool naming | `mcp_s_t` | `s_t` | configurable | ext-prefix | filter | +| Lifecycle | per-srv task | shared singleton | per-ext + idle 10m | per-session ExtensionMgr | per-agent | +| Lazy connect | no | no | ✅ meta | no (eager) | no | +| Hot reload | mtime+cmd | `tools/list_changed` | session boundary | `tools/list_changed` | no | +| OAuth | mtime disk-watch | RFC7591 + Flock | PKCE+auto | ? | FastMCP cache | +| Circuit breaker | ✅ 3/60s | no | partial | no | no | +| LOC | ~5,175 | ~1,664 | ~3,661 | unmeasured | unmeasured | + +### 3.7 Skills vs MCP — industry research + +Anthropic positions the two as **complementary**, not competing. The 2025-2026 consensus across practitioner blogs (Simon Willison, Anthropic engineering, StackOne) converged on: + +``` + Skills MCP + ────── ──── + Procedural knowledge Live connectivity + Markdown + YAML frontmatter Protocol spec + SDK + ~100 tokens/skill in prompt 10K-17K tokens/server in prompt + Body lazy-loaded via read tool All tool schemas eagerly loaded + Local file Server (process or HTTP endpoint) + No auth, no lifecycle OAuth, lifecycle, transports + Open standard (Dec 2025) Linux Foundation steward (late 2025) +``` + +**Adoption**: no major OSS coding agent has rejected MCP in favor of Skills-only (or vice versa). All 11 surveyed agents (Claude Code, Codex CLI, Gemini CLI, Cursor, Cline, Goose, opencode, Junie, Kiro, Roo, GitHub Copilot agent-mode) support both. + +**Cost data**: large MCP server collections have been documented consuming substantial context budget — StackOne benchmarks Sonnet 4.6 at 42% tool-selection accuracy on the unmodified MCP surface vs 80% with their Code Mode wrapper, motivating the spec-level fix in MCP SEP-1576 ("Mitigating Token Bloat in MCP") which proposes progressive disclosure (**not yet ratified**). + +**Implication for this ADR**: progressive disclosure is not optional for openab-agent. The agent's design principle commits to a ~500-token system prompt; a naïve flat MCP integration would 30× that budget. Skills' descriptor-only injection pattern is the precedent. + +### 3.8 Transport landscape & SaaS MCP server adoption + +MCP defines three transport profiles. Their 2026-Q2 status: + +| Transport | Spec status | Where it lives | +|---|---|---| +| **stdio** | Stable | Local child process — Anthropic reference servers, npm/pypi community packages | +| **Streamable HTTP** | Current (MCP spec 2025-11-25), supersedes HTTP+SSE | Vendor-hosted SaaS endpoints | +| **HTTP+SSE** | Deprecated by spec 2025-11-25; vendor sunsets in progress | Legacy fixtures — Atlassian sunsets 2026-06-30 | + +``` + ┌──────────────────────────────── MCP Server Universe ─────────────────────────────────┐ + │ │ + │ ┌─────────────────────────────┐ ┌────────────────────────────────────┐ │ + │ │ LOCAL (majority of registry) │ │ REMOTE (vendor SaaS, growing) │ │ + │ │ │ │ │ │ + │ │ Transport: stdio │ │ Transport: Streamable HTTP │ │ + │ │ │ │ │ │ + │ │ filesystem sqlite │ │ Atlassian Figma Linear │ │ + │ │ postgres puppeteer │ │ Notion Sentry Supabase │ │ + │ │ github fetch │ │ HubSpot Slack Stripe │ │ + │ │ time gitlab │ │ Cloudflare Vercel Neon ... │ │ + │ │ ... │ │ │ │ + │ └─────────────────────────────┘ └────────────────────────────────────┘ │ + │ │ + │ ┌────────────────────────────────────────────────────────────────────────────┐ │ + │ │ LEGACY (deprecated, vendor sunsets in progress) │ │ + │ │ Transport: HTTP+SSE │ │ + │ │ e.g. Atlassian https://mcp.atlassian.com/v1/sse (off 2026-06-30) │ │ + │ └────────────────────────────────────────────────────────────────────────────┘ │ + │ │ + └──────────────────────────────────────────────────────────────────────────────────────┘ +``` + +#### Local stdio servers (representative sample) + +Anthropic reference + community packages. All ship as `command + args`; no network endpoint. + +| Server | Implementation | Distribution | +|---|---|---| +| `mcp-server-filesystem` | Node | `@modelcontextprotocol/server-filesystem` (npm) | +| `mcp-server-sqlite` | Python | `mcp-server-sqlite` (pypi) | +| `mcp-server-postgres` | Python | `mcp-server-postgres` (pypi) — local DB | +| `mcp-server-puppeteer` | Node | `@modelcontextprotocol/server-puppeteer` (npm) | +| `mcp-server-github` | Go / Node | `github-mcp-server` (binary) / `@modelcontextprotocol/server-github` (npm) | +| `mcp-server-fetch` | Python | `mcp-server-fetch` (pypi) | +| `mcp-server-time` | Rust | `mcp-server-time` (cargo) | +| `mcp-server-gitlab` | Node | `@modelcontextprotocol/server-gitlab` (npm) | + +#### Vendor-hosted SaaS servers — all Streamable HTTP + +Survey of mainstream public endpoints (2026-Q2). Every active vendor endpoint surveyed is Streamable HTTP. The Atlassian SSE URL is the lone holdout and has a published sunset date. + +| Vendor | Endpoint | Transport | Notes | +|---|---|---|---| +| Atlassian (Rovo) | `https://mcp.atlassian.com/v1/mcp` | Streamable HTTP | Legacy SSE at `/v1/sse` sunset **2026-06-30** | +| Figma | `https://mcp.figma.com/mcp` | Streamable HTTP | OAuth via Figma account | +| Linear | `https://mcp.linear.app/mcp` | Streamable HTTP | OAuth | +| Notion | `https://mcp.notion.com/mcp` | Streamable HTTP | OAuth | +| Sentry | `https://mcp.sentry.dev/mcp` | Streamable HTTP | OAuth | +| Supabase | `https://mcp.supabase.com/mcp` | Streamable HTTP | OAuth | +| HubSpot | `https://mcp.hubspot.com/anthropic` | Streamable HTTP | OAuth | +| Slack | (vendor-hosted) | Streamable HTTP | OAuth | +| Stripe | hosted (see Stripe MCP docs for current path) | Streamable HTTP | API key | +| Cloudflare | multiple endpoints under `*.mcp.cloudflare.com` | Streamable HTTP | OAuth (workers/dns/r2/...) | +| Vercel | `https://mcp.vercel.com/` | Streamable HTTP | OAuth | +| Neon | `https://mcp.neon.tech/` | Streamable HTTP | OAuth | + +**Cover map**: stdio + Streamable HTTP covers all mainstream public MCP endpoints surveyed as of 2026-Q2. SSE-only deployments are legacy fixtures with vendor sunsets in progress; deferred to a hypothetical v2. + +--- + +## 4. Design Decision + +### 4.1 Architectural alternatives compared + +**Alternative A — Naïve flat in-core.** Every MCP tool from every connected server becomes a top-level entry in `tool_definitions()`. Surface explodes from 4 → 150+ tools; system prompt grows ~500 → ~17,000 tokens (5 servers × ~20 tools each, ~160 tokens per descriptor). Hermes Agent and opencode both pay this cost; StackOne benchmarks (§3.7) show tool-selection accuracy drops sharply under naïve flat surfaces. + +**Alternative B — Sidecar / plugin process.** Spawn a separate `openab-mcp-bridge` binary; agent core has no `rmcp` dependency; communicate via stdio JSON-RPC. RAM saving is 1-2 MB on a 15-40 MB baseline — noise — but the bridge process itself adds ~15 MB and inherits opencode's documented sidecar failure modes (#11868 113 GB leak / #7261 orphan processes / #13041 per-session duplication). Cost ≫ benefit (see §7). + +**Alternative C — CHOSEN: in-core `rmcp` + progressive-disclosure meta-tool.** `rmcp` enters `Cargo.toml`. Tool surface grows by exactly **1 tool**: `mcp`. All MCP capability (server enumeration, tool discovery, invocation, status) flows through that single tool's `action` field. System prompt grows ~500 → ~600 tokens (+100 for the meta-tool blurb). + +### 4.2 Why C honors openab-agent design principles + +| Principle (`docs/adr/openab-agent.md` §2) | A (flat) | B (sidecar) | **C (chosen)** | +|---|:---:|:---:|:---:| +| Minimal tool surface (4 tools) | ⛔ 150+ | ✅ 4 | ✅ 5 | +| Tiny system prompt (~500 tokens) | ⛔ ~17K | ✅ ~500 | ⚠️ ~600 (+100 over budget; accepted as smallest viable surface) | +| No SDK dependency | ⛔ rmcp | ✅ none | ⚠️ rmcp (+1-2 MB binary, see §7) | +| Multi-model | ✅ | ✅ | ✅ | + +C concedes the "no SDK dependency" principle for a 1-2 MB binary cost. §7 shows that cost is dominated by child-process RAM (5-80 MB per server, depending on implementation language) regardless of architecture, so the concession is dwarfed by usage cost. + +### 4.3 Symmetry with Skills (PR #955) + +Skills is openab's existing "first-tier-but-tiny" extension. The mapping is exact: + +``` +┌────────────────────────────┬─────────────────────────────────────┐ +│ Skills (224 LOC, in-core) │ MCP (proposed, in-core) │ +├────────────────────────────┼─────────────────────────────────────┤ +│ Inject metadata only │ Inject 1 meta-tool only │ +│ (name + description) │ (name + action sketch) │ +├────────────────────────────┼─────────────────────────────────────┤ +│ Body load via `read` tool │ Server connect via `mcp` tool │ +│ on agent's demand │ on agent's demand (lazy connect) │ +├────────────────────────────┼─────────────────────────────────────┤ +│ ~100 tokens / 10 skills │ ~100 tokens / N servers │ +├────────────────────────────┼─────────────────────────────────────┤ +│ No new crate dep │ Adds rmcp (1-2 MB binary delta) │ +└────────────────────────────┴─────────────────────────────────────┘ +``` + +Skills' authors weighed "simple in-core mechanism vs plugin abstraction" and chose in-core. The same trade-off applies to MCP: plugin abstraction is ~10× the complexity for negligible RAM saving. + +--- + +## 5. Detailed Design + +### 5.1 Tool surface (4 + 1) + +``` +openab-agent/src/tools.rs::tool_definitions() returns 5 entries: + + [ "read" ] ─── existing, unchanged + [ "write" ] ─── existing, unchanged + [ "edit" ] ─── existing, unchanged + [ "bash" ] ─── existing, unchanged + [ "mcp" ] ─── NEW +``` + +### 5.2 The `mcp` meta-tool schema + +```jsonc +{ + "name": "mcp", + "description": "Interact with configured MCP servers. Use action='help' for usage.", + "input_schema": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["help", "list_servers", "list_tools", + "describe_tool", "call", "status"] + }, + "server": { "type": "string" }, + "tool": { "type": "string" }, + "arguments": { "type": "object" } + }, + "required": ["action"] + } +} +``` + +Per-action contract: + +| action | required fields | returns | +|---|---|---| +| `help` | — | usage doc string | +| `list_servers` | — | `[{ name, status, transport, tools_count }]` | +| `list_tools` | `server` | `[{ name, description }]` | +| `describe_tool` | `server`, `tool` | `{ name, description, input_schema }` | +| `call` | `server`, `tool`, `arguments` | tool's `CallToolResult` | +| `status` | `server?` | per-server health / last error / OAuth state | + +### 5.3 Agent loop interaction + +Typical multi-turn usage (lazy connect at first need, idle eviction after TTL): + +- **Turn 1** — LLM calls `mcp(action: "list_servers")`; no IO, served from config cache. Returns `["github (stdio)", ...]`. +- **Turn 2** — LLM calls `mcp(action: "list_tools", server: "github")`; `lazy_connect("github")` spawns child proc, `peer.list_all_tools()` fetches descriptors. Returns `[{name, description}, ...]`. +- **Turn 3** — LLM calls `mcp(action: "call", server, tool, arguments)`; `peer.call_tool()` invokes. Returns `CallToolResult`. +- **Idle (no MCP call for `idle_ttl`)** — `IdleEvictor` shuts down child proc, drops Peer; config + descriptor cache retained for fast re-connect. + +### 5.4 Module layout + +``` +openab-agent/src/ +├── agent.rs (existing — add 1 match arm in execute_tool) +├── auth.rs (existing — TokenStore reused by mcp/oauth.rs) +├── llm.rs (existing — UNCHANGED, ToolDef is already generic) +├── tools.rs (existing — add `mcp` to tool_definitions()) +├── skills.rs (existing — UNCHANGED) +└── mcp/ (NEW module) + ├── mod.rs (public: McpRuntimeManager, dispatch()) + ├── config.rs (mcpServers schema, ${env:VAR} interpolation) + ├── runtime.rs (per-server lifecycle, lazy connect, idle TTL) + ├── meta_tool.rs (action dispatch: list_servers / list_tools / ...) + ├── oauth.rs (uses src/auth.rs TokenStore; built-in providers) + └── breaker.rs (circuit breaker per server) +``` + +Estimated total: **500-750 LOC** (no `reload.rs`; per-session refresh handled by `McpRuntimeManager::new()` re-reading config at session start). `llm.rs` is unchanged because both Anthropic and OpenAI Responses providers consume the generic `ToolDef` abstraction. + +### 5.5 `rmcp` dependency & features + +```toml +# openab-agent/Cargo.toml +[dependencies] +rmcp = { version = "1.7", default-features = false, features = [ + "client", + "transport-child-process", + "transport-streamable-http-client-reqwest", + "auth", +] } +``` + +- `client` only — we host nothing +- `transport-child-process` — stdio servers (majority of registry, see §3.8) +- `transport-streamable-http-client-reqwest` — vendor-hosted SaaS endpoints (reqwest is already a transitive dep) +- `auth` — OAuth helpers +- `default-features = false` — avoid pulling SSE / server features we don't need (SSE intentionally omitted per §3.8 — superseded by Streamable HTTP in MCP spec 2025-11-25, all surveyed vendors migrated or migrating) + +Binary delta estimate: **+1-2 MB** (see §7). + +### 5.6 Config schema + +Single root key `mcpServers` to match Claude Code / Codex / Cursor / Cline convention. Loaded from `.openab/agent/mcp.json` (project) and `~/.openab/agent/mcp.json` (global), project-local takes precedence on name collision. + +```jsonc +{ + "mcpServers": { + "github": { + "type": "stdio", + "command": "github-mcp-server", + "args": ["--repo-token", "${env:GITHUB_TOKEN}"], + "env": { "GH_HOST": "github.com" } + }, + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + }, + "fs": { + "type": "stdio", + "command": "mcp-server-filesystem", + "args": ["/workspace"], + "tool_filter": { "include": ["read_*", "list_*"] } + } + } +} +``` + +- `${env:VAR}` interpolation matches Cursor / Cline; missing var = startup error for that server (others continue) +- `tool_filter` supports `include` / `exclude` glob lists (lifted from OpenHands' `filter_tools_regex`) +- Per-server failure isolated — one bad server does not block agent boot + +### 5.7 Lifecycle + +``` + ┌─────────────────────────────────────┐ + │ McpRuntimeManager (1 per agent) │ + │ │ + │ config: Arc │ + │ servers: Map │ + │ idle_ttl: Duration (default 10m) │ + └─────────────────────────────────────┘ + │ + │ on first call needing server X: + ▼ + ┌─────────────────────────────────────┐ + │ ServerHandle (lazy) │ + │ │ + │ state: Disconnected | Connecting | │ + │ Connected(Peer) | Failed | │ + │ NeedsAuth │ + │ last_used: Instant │ + │ breaker: CircuitBreaker │ + │ tools_cache: Vec │ + └─────────────────────────────────────┘ + │ + ┌─────────────────┼─────────────────┐ + │ │ + ┌───────────┐ ┌───────────┐ + │ child proc│ │ HTTP conn │ + │ (stdio) │ │ (reqwest) │ + └───────────┘ └───────────┘ +``` + +- **Lazy connect**: server is `Disconnected` at boot; transitions to `Connecting → Connected` on first action needing it +- **Idle eviction**: background task evicts servers idle > `idle_ttl` (default 10m, configurable per server). State drops to `Disconnected`; tools cache retained for fast re-connect +- **No per-thread isolation**: agent is single-thread-per-session; openab broker handles thread-level concurrency upstream +- **Connection reuse**: while connected, all `mcp call` actions reuse the same `Peer` + +### 5.8 Config refresh model + +Rather than file-watching mid-session, openab-agent re-reads `mcp.json` at session boundaries: + +- **New ACP session** → `McpRuntimeManager::new()` parses `mcp.json` from scratch; ~5 LOC of glue, zero hot-path code +- **Mid-session config edit** → not visible until next session; users re-open the Discord/Slack thread (cheap in openab's per-thread session model) +- **Process restart** → applies config changes globally; rarely needed because broker spawns short-lived agent processes per session + +This drops `notify` crate + lease counter + diff applier (~150 LOC, race-condition hotspot) for an 80% UX coverage. Hermes' `/reload-mcp` slash command (§3.1) is the precedent for "explicit user-triggered reload >> implicit file watcher" in a coding-agent context. + +### 5.9 Error isolation & circuit breaker + +Adopted from Hermes Agent (the only surveyed project that ships one): + +``` + ┌─────────────────────────────────────────┐ + │ CircuitBreaker (per server) │ + │ │ + │ state: Closed | Open | HalfOpen │ + │ fail_threshold: 3 (configurable) │ + │ cooldown: 60s (configurable) │ + └─────────────────────────────────────────┘ + │ + ┌───────────────────────┼───────────────────────┐ + │ │ │ + ▼ ▼ ▼ + 3 fails in 30s 60s elapsed 1 success + ─────────────► ─────────────► ─────────────► + Closed → Open Open → HalfOpen HalfOpen → Closed + (allow 1 probe) + │ + │ probe fails + ▼ + HalfOpen → Open + (reset cooldown) +``` + +While `Open`, `mcp call` returns `{"error":"server unavailable, cooldown 45s remaining"}` immediately — no child-process resurrection attempts, no LLM hang. + +`rmcp` error model maps cleanly: + +| `rmcp` error | meta-tool response | Counts toward breaker? | +|---|---|---| +| `ServiceError::McpError` (protocol) | `{ error: msg, code }` | No (server-level intent) | +| `ServiceError::TransportSend/Closed` | `{ error: "transport", server: ... }` | Yes | +| `CallToolResult { isError: true }` | passed through as result | No (tool-level) | + +--- + +## 6. OAuth + +### 6.1 Shared TokenStore + +`openab-agent/src/auth.rs` already implements hand-rolled PKCE for Codex (`CODEX_AUTHORIZE_URL`, port 1455). The TokenStore (`~/.openab/agent/auth.json`, 0o600) is reused — `mcp/oauth.rs` calls into the same store with namespaced keys (`mcp:` vs `codex`). + +### 6.2 Built-in providers (Phase 2) + +| Provider | Auth URL | Token URL | Callback | Scopes | +|---|---|---|---|---| +| `anthropic-mcp` | `https://claude.ai/oauth/authorize` | `https://platform.claude.com/v1/oauth/token` | `localhost:53692/callback` | `org:create_api_key user:profile user:inference user:sessions:claude_code user:mcp_servers user:file_upload` (subset varies per use) | +| `github-copilot` | (existing pi/anthropic flow) | existing | existing | existing | +| `generic` | from `mcpServers[name].oauth.authorize_url` | from `.oauth.token_url` | dynamically allocated port | from `.oauth.scopes` | + +### 6.3 Custom provider extension point + +Config can declare `oauth: { authorize_url, token_url, client_id, scopes }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. + +--- + +## 7. Memory Impact Analysis + +Included because the sidecar alternative (§4.1 B) was motivated by memory. + +`openab-agent` baseline is 15-40 MB RSS. `rmcp` with the §5.5 feature set adds +1-2 MB binary delta and +0 MB idle RSS (no servers configured). Once servers connect, child processes dominate: Go ~10-20 MB, Rust ~5-10 MB, Python/Node ~30-80 MB each. + +| Aspect | A. Naïve flat | B. Sidecar | **C. In-core + meta-tool** | +|---|---|---|---| +| Idle RAM delta | +1-2 MB | +0 MB | +1-2 MB | +| Per-server RAM | +5-80 MB (child) | +15 MB bridge + 5-80 MB | +5-80 MB | +| System prompt tokens | +17,000 | +600 (if sidecar discloses lazily) | +600 | +| Lifecycle complexity | Medium | High (2 procs, IPC, version skew) | Medium | +| Crash blast radius | Bad server kills loop | Bridge crash = all gone | Bad server isolated | + +The 1-2 MB sidecar saving is dominated by per-server child RAM (identical across architectures) and by token cost (identical *as long as progressive disclosure is used*). Memory does not justify the sidecar. + +--- + +## 8. CLI Surface + +``` +openab-agent mcp list — show configured servers + status +openab-agent mcp status [server] — health, last error, OAuth state +openab-agent mcp add — append a stdio server to config +openab-agent mcp add --url — append an http server +openab-agent mcp remove — remove a server from config +openab-agent mcp login — run OAuth flow for a server +openab-agent mcp refresh — force-refresh OAuth token +openab-agent mcp test [json] — invoke a tool from CLI (debug) +openab-agent mcp doctor — diagnose config, network, auth +``` + +Subcommand placement under existing `openab-agent` binary — no new binary. CLI is a thin wrapper over `McpRuntimeManager` to keep the same code path validated by both LLM-driven and human-driven flows. + +--- + +## 9. Rollout Plan + +~6 weeks across three phases: + +1. **Foundation (3w)** — `rmcp` + stdio + meta-tool + minimal CLI, behind `--features mcp` +2. **Network & auth (2w)** — Streamable HTTP transport + OAuth providers + `login`/`refresh` CLI; promote flag default-on +3. **Resilience (1w)** — circuit breaker + `doctor` CLI; remove flag + +Week-by-week task breakdown lives on the tracking issue (filed at PR open). + +--- + +## 10. Open Questions + +1. **Should `mcp.json` live in the agent or the broker?** Agent owns its own config today; broker's `[agent].inherit_cloud_mcp_servers` (issue #753) is a separate concern. Proposal: agent reads `mcp.json` directly; broker can layer additional servers via env or kubectl ConfigMap. **Owner**: needs broker-team alignment. +2. **Native-agent feature parity with broker-forward path.** PRs #329/#330/#345/#903 attempted broker-side MCP forwarding to backing CLIs. With native MCP in openab-agent, do we deprecate that path, keep it for non-native CLIs, or unify? Proposal: native agent uses its own MCP runtime; broker continues to forward to backing CLIs that lack native MCP (Cursor, Copilot). **Owner**: broker-team. + +Resolved at design time (tracked in tracking issue, not open): tool-naming prefix (`_` single-underscore, matching Hermes §3.1 / opencode §3.2 convention), `session/load` re-enumeration (process-local state, re-read), per-tool permission gates (post-Phase-3 opt-in flag), `resources`/`prompts` capabilities (v2). + +--- + +## 11. References + +### Internal + +- `docs/adr/openab-agent.md` — agent charter, design principles cited in §4.2 +- `docs/adr/pr-contribution-guidelines.md` — prior-art requirements followed in §3 +- `openab-agent/src/skills.rs` (PR #955) — extension-pattern precedent cited in §4.3 +- `openab-agent/src/auth.rs` — TokenStore reused in §6.1 +- PRs #329, #330, #345, #903 — closed broker-forward attempts, §1.3 +- Issue #753 — broker-side MCP opt-out (out of scope) +- PR #951 — SessionPool persisted-mapping fix (informs §10 resolved-at-design-time list) + +### External — projects + +- Hermes Agent: https://github.com/NousResearch/hermes-agent +- opencode: https://github.com/anomalyco/opencode (formerly https://github.com/sst/opencode) +- pi-mcp-adapter: https://github.com/nicobailon/pi-mcp-adapter +- Goose: https://github.com/aaif-goose/goose (formerly https://github.com/block/goose) +- OpenHands: https://github.com/OpenHands/OpenHands +- rmcp: https://github.com/modelcontextprotocol/rust-sdk +- OpenClaw (evaluated per `pr-contribution-guidelines.md`, scope not applicable — see §3; canonical repo URL not publicly resolvable, internal reference via avasdream blog cited in guidelines) + +### External — specs & research + +- MCP spec: https://modelcontextprotocol.io +- MCP spec changelog 2025-11-25 (Streamable HTTP supersedes HTTP+SSE): https://modelcontextprotocol.io/specification/2025-11-25/basic/transports +- MCP SEP-1576 — Mitigating Token Bloat in MCP: https://github.com/modelcontextprotocol/modelcontextprotocol/issues/1576 +- Atlassian Rovo MCP SSE→Streamable HTTP migration notice (sunset 2026-06-30): https://community.atlassian.com/forums/Rovo-articles/Migrating-from-Atlassian-s-MCP-Server-SSE-to-Streamable-HTTP/ba-p/3092878 +- Figma MCP server (Streamable HTTP): https://help.figma.com/hc/en-us/articles/32132100833559-Guide-to-the-Dev-Mode-MCP-Server +- Anthropic — Equipping agents for the real world with Agent Skills: https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills +- Anthropic — Code execution with MCP: https://www.anthropic.com/engineering/code-execution-with-mcp +- Simon Willison — Claude Skills (2025-10-16): https://simonwillison.net/2025/Oct/16/claude-skills/ +- StackOne — MCP Token Optimization: https://www.stackone.com/blog/mcp-token-optimization/ +- opencode issues cited in §3.2, §4.1, §7: #11868, #7261, #13041 From fdd8738c80905440ea5a6c4601aaaa12074e90a7 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 16:38:41 +0000 Subject: [PATCH 02/54] docs(adr): unify MCP OAuth flows, add device-code preference and Fargate guardrails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - §6.4 flow selection: device-code preferred (matches existing CLI convention), paste-back universal fallback, browser laptop opt-in - §5.2 add login/complete_login meta-tool actions; login returns flow-tagged union - §6.1 spell out TokenStore persistence assumption + cold-start refresh - §6.3 device_authorization_endpoint extension point + RFC 8414 discovery - §3.8 stdio container-image caveat (interpreter required for Node/Python) - §5.7 max_concurrent_servers knob (default 10, see §7 for constrained tuning) - §7 Fargate 512MB/1GB OOM analysis + mitigations Co-Authored-By: Claude Opus 4.7 --- docs/adr/openab-agent-mcp.md | 64 ++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md index bd078f604..f0d8848c2 100644 --- a/docs/adr/openab-agent-mcp.md +++ b/docs/adr/openab-agent-mcp.md @@ -208,6 +208,8 @@ Anthropic reference + community packages. All ship as `command + args`; no netwo | `mcp-server-time` | Rust | `mcp-server-time` (cargo) | | `mcp-server-gitlab` | Node | `@modelcontextprotocol/server-gitlab` (npm) | +**Container-image caveat for headless deployments**: Node/Python stdio servers require the corresponding interpreter (`node`, `python3`, `uvx`, `npx`) in the image. The openab base image ships none. Operators running openab-agent in headless environments (Fargate, Kubernetes pods, CI) must either bake the interpreter into a derived image or limit `mcpServers` to Go/Rust binaries (column above). A misconfigured server fails in isolation per §5.9. + #### Vendor-hosted SaaS servers — all Streamable HTTP Survey of mainstream public endpoints (2026-Q2). Every active vendor endpoint surveyed is Streamable HTTP. The Atlassian SSE URL is the lone holdout and has a published sunset date. @@ -302,11 +304,13 @@ openab-agent/src/tools.rs::tool_definitions() returns 5 entries: "action": { "type": "string", "enum": ["help", "list_servers", "list_tools", - "describe_tool", "call", "status"] + "describe_tool", "call", "status", + "login", "complete_login"] }, - "server": { "type": "string" }, - "tool": { "type": "string" }, - "arguments": { "type": "object" } + "server": { "type": "string" }, + "tool": { "type": "string" }, + "arguments": { "type": "object" }, + "redirect_url": { "type": "string" } }, "required": ["action"] } @@ -323,6 +327,8 @@ Per-action contract: | `describe_tool` | `server`, `tool` | `{ name, description, input_schema }` | | `call` | `server`, `tool`, `arguments` | tool's `CallToolResult` | | `status` | `server?` | per-server health / last error / OAuth state | +| `login` | `server` | `{ flow: "device", user_code, verification_url, ... }` or `{ flow: "paste", authorize_url, state, ... }` — see §6.4 | +| `complete_login` | `server`, `redirect_url` | `{ ok: true }` or `{ error }` — paste flow only; device flow polls internally | ### 5.3 Agent loop interaction @@ -412,9 +418,10 @@ Single root key `mcpServers` to match Claude Code / Codex / Cursor / Cline conve ┌─────────────────────────────────────┐ │ McpRuntimeManager (1 per agent) │ │ │ - │ config: Arc │ - │ servers: Map │ - │ idle_ttl: Duration (default 10m) │ + │ config: Arc │ + │ servers: Map │ + │ idle_ttl: Duration (10m) │ + │ max_concurrent: usize (10) │ └─────────────────────────────────────┘ │ │ on first call needing server X: @@ -440,7 +447,7 @@ Single root key `mcpServers` to match Claude Code / Codex / Cursor / Cline conve - **Lazy connect**: server is `Disconnected` at boot; transitions to `Connecting → Connected` on first action needing it - **Idle eviction**: background task evicts servers idle > `idle_ttl` (default 10m, configurable per server). State drops to `Disconnected`; tools cache retained for fast re-connect -- **No per-thread isolation**: agent is single-thread-per-session; openab broker handles thread-level concurrency upstream +- **Concurrency cap**: `max_concurrent_servers` bounds simultaneously-`Connected` servers (default 10; see §7 for constrained-env tuning). When at cap, the LRU connected server is force-evicted before connecting a new one - **Connection reuse**: while connected, all `mcp call` actions reuse the same `Peer` ### 5.8 Config refresh model @@ -498,6 +505,10 @@ While `Open`, `mcp call` returns `{"error":"server unavailable, cooldown 45s rem `openab-agent/src/auth.rs` already implements hand-rolled PKCE for Codex (`CODEX_AUTHORIZE_URL`, port 1455). The TokenStore (`~/.openab/agent/auth.json`, 0o600) is reused — `mcp/oauth.rs` calls into the same store with namespaced keys (`mcp:` vs `codex`). +**Persistence assumption**: TokenStore is treated as persistent state. Deployments must mount `~/.openab/` on durable storage — hostPath / PVC (k8s work-agents), volume + S3 sync (Fargate Mira), or developer-laptop home directory. Ephemeral container filesystems force a re-bootstrap on every restart and are not a supported configuration. + +**Cold-start refresh**: on process start the runtime reads TokenStore lazily (on first `mcp call` per server). Expired access tokens trigger an in-process refresh via the stored refresh token; success updates the store and proceeds transparently. Refresh failure (revoked / expired refresh token) flips the server's state to `NeedsAuth` (§5.7); the next `mcp call` returns an error that prompts the LLM to re-run the §6.4 login flow. No human interaction is required as long as the refresh token remains valid. + ### 6.2 Built-in providers (Phase 2) | Provider | Auth URL | Token URL | Callback | Scopes | @@ -506,9 +517,40 @@ While `Open`, `mcp call` returns `{"error":"server unavailable, cooldown 45s rem | `github-copilot` | (existing pi/anthropic flow) | existing | existing | existing | | `generic` | from `mcpServers[name].oauth.authorize_url` | from `.oauth.token_url` | dynamically allocated port | from `.oauth.scopes` | +Callback values apply when the browser flow is engaged (`--browser` / `$DISPLAY` set), and when the agent-guided paste-back branch of §6.4 is selected (user copies the redirect URL from the browser URL bar). The device-code branch of §6.4 ignores the callback entirely. + ### 6.3 Custom provider extension point -Config can declare `oauth: { authorize_url, token_url, client_id, scopes }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. +Config can declare `oauth: { authorize_url, token_url, client_id, scopes, device_authorization_endpoint? }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. If `device_authorization_endpoint` is set (or RFC 8414 `/.well-known/oauth-authorization-server` advertises one), §6.4 device-code flow is preferred over paste-back. + +### 6.4 Agent-guided OAuth flow (default) + +openab-agent's primary deployment surface is containerized (k8s pods, Fargate tasks) where `localhost:53692/callback` is unreachable and there is no display to open. Two non-browser flows are supported; the runtime picks per server based on capability. Browser-callback remains a laptop-only opt-in (`$DISPLAY` set, or `--browser` passed to `openab-agent mcp login`). + +**Selection logic** (on `mcp(action: "login", server: X)`): + +1. If `X` declares an `oauth.device_authorization_endpoint` in config (§6.3) — or if RFC 8414 discovery against the server's authorize URL advertises one — runtime uses **device-code flow** (RFC 8628). Matches openab's existing CLI convention (`claude auth login`, `codex --device-auth`, `grok --device-auth`). +2. Else runtime uses **paste-back flow** (standard auth-code + PKCE). Universal fallback for OAuth 2.1 servers without a device endpoint (Linear, Notion, Figma, Sentry, ...). + +**Device-code flow** (typically platform OAuth: Anthropic, OpenAI, xAI): + +- `login` returns `{ flow: "device", user_code, verification_url, expires_in }`. Agent relays to chat: "Open `https://example.com/device`, enter code: `ABCD-EFGH`". +- Runtime polls the token endpoint in background (5s interval, RFC 8628 §3.5). On success, persists tokens under `mcp:X`, transitions server to `Connected`. +- LLM checks `mcp(action: "status", server: X)` to learn when ready; `complete_login` not required for this branch. + +**Paste-back flow** (typically MCP SaaS: Linear, Notion, Figma, ...): + +- `login` returns `{ flow: "paste", authorize_url, state }`. Runtime persists transient `{verifier, state}` in TokenStore. Agent relays to chat: "Open this link, sign in, paste the URL you land on back here". +- User pastes the URL as next chat message; LLM calls `mcp(action: "complete_login", server: X, redirect_url: "...")`. +- Runtime parses `code` + `state`, validates `state`, performs PKCE token exchange against `token_url`, persists tokens under `mcp:X`, drops transient state. + +**Security** (both flows): + +- Device-code `user_code` is short-lived (RFC 8628 §3.2, typically ≤10 min); an attacker who sees the code in chat must also race the polling loop and prove device ownership. +- Paste-back redirect URL carries only the authorization code (OAuth 2.1 PKCE; implicit/hybrid removed); code is single-use + ≤10 min; PKCE verifier held in-process makes intercepted codes unusable. +- Token exchange happens entirely inside the agent process; the chat channel never carries access or refresh tokens. Refresh rotation runs in-process per §6.1. + +`openab-agent/src/auth.rs` already ships all three paths for Codex OAuth (browser L150-244, paste-back L165-201, device L328-440). This ADR generalizes that pattern across MCP servers and centralizes flow selection on per-server capability rather than per-CLI hard-coding. OpenHands notes the same headless-OAuth incompatibility (§3.5) without shipping a fix. --- @@ -528,6 +570,8 @@ Included because the sidecar alternative (§4.1 B) was motivated by memory. The 1-2 MB sidecar saving is dominated by per-server child RAM (identical across architectures) and by token cost (identical *as long as progressive disclosure is used*). Memory does not justify the sidecar. +**Constrained-environment note (Fargate / small Kubernetes pods).** Fargate Spot tasks at 512 MB / 1 GB have no swap; OOMKill is hard. Worst-case stack — agent baseline 40 MB + 5 Node/Python stdio servers at 80 MB each + LLM context buffers — sums to ~440-540 MB, which trips a 512 MB task before any prompt processing. Two mitigations: (a) lower `max_concurrent_servers` to 3 in `mcp.json` (§5.7), bounding worst case to ~280 MB; (b) prefer Go/Rust stdio servers (5-20 MB) or HTTP servers (0 MB local) over Node/Python interpreters. The `mcp doctor` CLI (§8) flags configurations whose worst-case sum exceeds the cgroup limit. + --- ## 8. CLI Surface @@ -538,7 +582,7 @@ openab-agent mcp status [server] — health, last error, OAuth state openab-agent mcp add — append a stdio server to config openab-agent mcp add --url — append an http server openab-agent mcp remove — remove a server from config -openab-agent mcp login — run OAuth flow for a server +openab-agent mcp login [--browser] — run OAuth flow (see §6.4; --browser opts into localhost callback) openab-agent mcp refresh — force-refresh OAuth token openab-agent mcp test [json] — invoke a tool from CLI (debug) openab-agent mcp doctor — diagnose config, network, auth From 47363a27e6e2574f918f90540f9b35b2a8e98532 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 16:57:35 +0000 Subject: [PATCH 03/54] =?UTF-8?q?docs(adr):=20harden=20MCP=20OAuth=20?= =?UTF-8?q?=E2=80=94=20opt-in=20discovery=20+=20force-flush=20write=20cont?= =?UTF-8?q?ract?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - §6.4 RFC 8414 discovery disabled by default; opt-in requires oauth.discovery=true + oauth.discovery_allowlist (boot rejects otherwise). Rationale: awsvpc egress + SSRF surface in multi-tenant deployments. - §6.1 add RTR race warning: async persistence layers (Fargate S3 sync, eventually-consistent volumes) must flush new tokens to durable storage before Spot interruption, else cascade-revoke locks the user out. Contract: fsync(2) agent-side + mtime-event-driven sync deployment-side. - §6.3 expose oauth.discovery / oauth.discovery_allowlist on custom providers. Co-Authored-By: Claude Opus 4.7 --- docs/adr/openab-agent-mcp.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md index f0d8848c2..fc4c895b3 100644 --- a/docs/adr/openab-agent-mcp.md +++ b/docs/adr/openab-agent-mcp.md @@ -509,6 +509,12 @@ While `Open`, `mcp call` returns `{"error":"server unavailable, cooldown 45s rem **Cold-start refresh**: on process start the runtime reads TokenStore lazily (on first `mcp call` per server). Expired access tokens trigger an in-process refresh via the stored refresh token; success updates the store and proceeds transparently. Refresh failure (revoked / expired refresh token) flips the server's state to `NeedsAuth` (§5.7); the next `mcp call` returns an error that prompts the LLM to re-run the §6.4 login flow. No human interaction is required as long as the refresh token remains valid. +**Refresh-token rotation race with async persistence layers**: OAuth 2.1 servers issue a new refresh token on every rotation and immediately revoke the previous one; reuse of a revoked refresh token is treated as a replay attack and cascade-revokes the entire token chain. Deployments where TokenStore persistence is asynchronous (Fargate S3 sidecar sync, eventually-consistent volumes) must flush new tokens to durable storage *before* the agent can be killed — otherwise a Spot interruption between local write and remote sync restores the revoked token from S3 on the next task and locks the user out. Contract: + +- **Agent side**: `TokenStore` calls `fsync(2)` after every write to `auth.json` +- **Deployment side**: the S3 / volume sync layer must trigger on `auth.json` mtime change (`inotify` / `fsnotify` event), not poll on a cron. Cron-driven sync (≥1 min interval) is incompatible with refresh-token rotation under Spot interruption +- **Reference deployment**: Mira (openab-ecs Fargate Spot) `mira-home/` S3 sync configuration + ### 6.2 Built-in providers (Phase 2) | Provider | Auth URL | Token URL | Callback | Scopes | @@ -521,7 +527,7 @@ Callback values apply when the browser flow is engaged (`--browser` / `$DISPLAY` ### 6.3 Custom provider extension point -Config can declare `oauth: { authorize_url, token_url, client_id, scopes, device_authorization_endpoint? }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. If `device_authorization_endpoint` is set (or RFC 8414 `/.well-known/oauth-authorization-server` advertises one), §6.4 device-code flow is preferred over paste-back. +Config can declare `oauth: { authorize_url, token_url, client_id, scopes, device_authorization_endpoint?, discovery?, discovery_allowlist? }` for any server. The generic provider handles PKCE + callback + token persistence. No code change needed for new MCP servers that use standard OAuth 2.1. If `device_authorization_endpoint` is set, §6.4 device-code flow is preferred over paste-back. RFC 8414 dynamic discovery is opt-in only and requires an allowlist — see §6.4. ### 6.4 Agent-guided OAuth flow (default) @@ -529,9 +535,11 @@ openab-agent's primary deployment surface is containerized (k8s pods, Fargate ta **Selection logic** (on `mcp(action: "login", server: X)`): -1. If `X` declares an `oauth.device_authorization_endpoint` in config (§6.3) — or if RFC 8414 discovery against the server's authorize URL advertises one — runtime uses **device-code flow** (RFC 8628). Matches openab's existing CLI convention (`claude auth login`, `codex --device-auth`, `grok --device-auth`). +1. If `X` declares an `oauth.device_authorization_endpoint` in config (§6.3), runtime uses **device-code flow** (RFC 8628). Matches openab's existing CLI convention (`claude auth login`, `codex --device-auth`, `grok --device-auth`). 2. Else runtime uses **paste-back flow** (standard auth-code + PKCE). Universal fallback for OAuth 2.1 servers without a device endpoint (Linear, Notion, Figma, Sentry, ...). +RFC 8414 dynamic discovery (`/.well-known/oauth-authorization-server`) is **disabled by default**. Operators opt in per-server via `oauth.discovery: true` plus an explicit `oauth.discovery_allowlist` of permitted domains (e.g. `["*.anthropic.com"]`); boot rejects `discovery: true` without an allowlist. Rationale: awsvpc egress restrictions + SSRF surface in multi-tenant deployments. + **Device-code flow** (typically platform OAuth: Anthropic, OpenAI, xAI): - `login` returns `{ flow: "device", user_code, verification_url, expires_in }`. Agent relays to chat: "Open `https://example.com/device`, enter code: `ABCD-EFGH`". From 32176fc1c3cce9ae5e4c999789b343da79bfe217 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 17:35:38 +0000 Subject: [PATCH 04/54] feat(openab-agent/mcp): scaffold MCP module + mcpServers config loader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 foundation slice per ADR §5.4 + §5.6: - Add optional `rmcp 1.7` dep + `mcp` feature flag (default off) - Wire `#[cfg(feature = "mcp")] mod mcp;` into main - New `mcp/config.rs`: `McpConfig` / `ServerConfig` (Stdio | Http) / `ToolFilter` / `OAuthConfig`, global+project layered load, project precedence on name collision, `${env:VAR}` interpolation with per-server error context OAuth fields limited to `provider` + `scopes`; custom-provider endpoints (§6.3) deferred to Phase 2 auth slice to avoid dead schema. Co-Authored-By: Claude Opus 4.7 --- openab-agent/Cargo.toml | 10 ++ openab-agent/src/main.rs | 2 + openab-agent/src/mcp/config.rs | 261 +++++++++++++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 3 + 4 files changed, 276 insertions(+) create mode 100644 openab-agent/src/mcp/config.rs create mode 100644 openab-agent/src/mcp/mod.rs diff --git a/openab-agent/Cargo.toml b/openab-agent/Cargo.toml index f059cfc6a..72edda354 100644 --- a/openab-agent/Cargo.toml +++ b/openab-agent/Cargo.toml @@ -21,9 +21,19 @@ getrandom = "0.4.2" urlencoding = "2.1.3" open = "5.3.5" url = "2.5.8" +rmcp = { version = "1.7", default-features = false, optional = true, features = [ + "client", + "transport-child-process", + "transport-streamable-http-client-reqwest", + "auth", +] } [target.'cfg(unix)'.dependencies] libc = "0.2" +[features] +default = [] +mcp = ["dep:rmcp"] + [dev-dependencies] tempfile = "3" diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index a37693079..f5c47f2e8 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -2,6 +2,8 @@ mod acp; mod agent; mod auth; mod llm; +#[cfg(feature = "mcp")] +mod mcp; mod skills; mod tools; diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs new file mode 100644 index 000000000..cba5f8fd1 --- /dev/null +++ b/openab-agent/src/mcp/config.rs @@ -0,0 +1,261 @@ +//! `mcpServers` config schema + loader. See ADR §5.6. +//! +//! Loaded from `.openab/agent/mcp.json` (project) and `~/.openab/agent/mcp.json` +//! (global), project entries take precedence on name collision. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, anyhow}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +pub struct McpConfig { + #[serde(rename = "mcpServers", default)] + pub servers: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ServerConfig { + Stdio { + command: String, + #[serde(default)] + args: Vec, + #[serde(default)] + env: HashMap, + #[serde(default, rename = "tool_filter")] + tool_filter: Option, + }, + Http { + url: String, + #[serde(default)] + oauth: Option, + #[serde(default, rename = "tool_filter")] + tool_filter: Option, + }, +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +pub struct ToolFilter { + #[serde(default)] + pub include: Vec, + #[serde(default)] + pub exclude: Vec, +} + +/// OAuth block. Phase 1 only parses `provider` + `scopes`; custom-provider +/// fields (§6.3: `authorize_url`, `token_url`, `device_authorization_endpoint`, +/// `discovery`, `discovery_allowlist`) land with the Phase 2 auth slice. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OAuthConfig { + #[serde(default)] + pub provider: Option, + #[serde(default)] + pub scopes: Vec, +} + +impl McpConfig { + /// Load + merge global and project configs from the standard locations. + /// Missing files are treated as empty. + pub fn load() -> Result { + let global = home_dir().map(|h| h.join(".openab/agent/mcp.json")); + let project = std::env::current_dir() + .ok() + .map(|c| c.join(".openab/agent/mcp.json")); + Self::load_layered(global.as_deref(), project.as_deref()) + } + + /// Load + merge two layers; project wins on name collision. + pub fn load_layered(global: Option<&Path>, project: Option<&Path>) -> Result { + let mut merged = Self::default(); + for path in [global, project].into_iter().flatten() { + if !path.exists() { + continue; + } + let layer = Self::load_file(path)?; + merged.servers.extend(layer.servers); + } + Ok(merged) + } + + fn load_file(path: &Path) -> Result { + let raw = std::fs::read_to_string(path) + .with_context(|| format!("read mcp config {}", path.display()))?; + serde_json::from_str(&raw) + .with_context(|| format!("parse mcp config {}", path.display())) + } +} + +impl ServerConfig { + /// Return a copy with `${env:VAR}` placeholders resolved against the + /// process environment. Missing env vars are an error for that server; + /// callers should skip the server and continue (ADR §5.6 "per-server + /// failure isolated"). `name` is the server name used in error context. + pub fn resolved(&self, name: &str) -> Result { + let json = serde_json::to_value(self)?; + let resolved = interpolate_value(json, &std::env::vars().collect()) + .with_context(|| format!("resolve env for mcp server {name:?}"))?; + Ok(serde_json::from_value(resolved)?) + } +} + +fn interpolate_value( + value: serde_json::Value, + env: &HashMap, +) -> Result { + use serde_json::Value; + match value { + Value::String(s) => Ok(Value::String(interpolate_env(&s, env)?)), + Value::Array(items) => items + .into_iter() + .map(|v| interpolate_value(v, env)) + .collect::>>() + .map(Value::Array), + Value::Object(map) => map + .into_iter() + .map(|(k, v)| interpolate_value(v, env).map(|v| (k, v))) + .collect::>>() + .map(Value::Object), + other => Ok(other), + } +} + +/// Replace `${env:VAR}` tokens in `input` with the matching env value. +/// Missing variables produce an error naming the offender. +pub fn interpolate_env(input: &str, env: &HashMap) -> Result { + let mut out = String::with_capacity(input.len()); + let mut rest = input; + while let Some(start) = rest.find("${env:") { + out.push_str(&rest[..start]); + let after = &rest[start + "${env:".len()..]; + let end = after + .find('}') + .ok_or_else(|| anyhow!("unterminated ${{env:..}} in {input:?}"))?; + let var = &after[..end]; + let val = env + .get(var) + .ok_or_else(|| anyhow!("env var ${var} not set (referenced by mcp config)"))?; + out.push_str(val); + rest = &after[end + 1..]; + } + out.push_str(rest); + Ok(out) +} + +fn home_dir() -> Option { + std::env::var_os("HOME").map(PathBuf::from) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn env(pairs: &[(&str, &str)]) -> HashMap { + pairs.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect() + } + + #[test] + fn interpolate_replaces_tokens() { + let e = env(&[("FOO", "bar"), ("X", "y")]); + assert_eq!(interpolate_env("a${env:FOO}b${env:X}", &e).unwrap(), "abarby"); + } + + #[test] + fn interpolate_passes_through_plain_strings() { + let e = env(&[]); + assert_eq!(interpolate_env("plain", &e).unwrap(), "plain"); + } + + #[test] + fn interpolate_errors_on_missing_var() { + let e = env(&[]); + let err = interpolate_env("${env:MISSING}", &e).unwrap_err().to_string(); + assert!(err.contains("MISSING"), "expected MISSING in error: {err}"); + } + + #[test] + fn interpolate_errors_on_unterminated() { + let e = env(&[("FOO", "bar")]); + assert!(interpolate_env("${env:FOO", &e).is_err()); + } + + #[test] + fn parses_stdio_and_http_servers() { + let json = r#"{ + "mcpServers": { + "fs": { + "type": "stdio", + "command": "mcp-server-filesystem", + "args": ["/workspace"], + "tool_filter": { "include": ["read_*"] } + }, + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + assert_eq!(cfg.servers.len(), 2); + match cfg.servers.get("fs").unwrap() { + ServerConfig::Stdio { command, args, tool_filter, .. } => { + assert_eq!(command, "mcp-server-filesystem"); + assert_eq!(args, &vec!["/workspace".to_string()]); + assert_eq!(tool_filter.as_ref().unwrap().include, vec!["read_*"]); + } + _ => panic!("expected stdio"), + } + match cfg.servers.get("linear").unwrap() { + ServerConfig::Http { url, oauth, .. } => { + assert_eq!(url, "https://mcp.linear.app/mcp"); + assert_eq!(oauth.as_ref().unwrap().provider.as_deref(), Some("linear")); + } + _ => panic!("expected http"), + } + } + + #[test] + fn resolved_substitutes_env_in_args() { + // SAFETY: single-threaded test; isolated env key. + unsafe { std::env::set_var("MCP_TEST_TOKEN", "secret123"); } + let cfg = ServerConfig::Stdio { + command: "github-mcp-server".into(), + args: vec!["--token".into(), "${env:MCP_TEST_TOKEN}".into()], + env: HashMap::new(), + tool_filter: None, + }; + match cfg.resolved("github").unwrap() { + ServerConfig::Stdio { args, .. } => { + assert_eq!(args[1], "secret123"); + } + _ => unreachable!(), + } + } + + #[test] + fn merge_project_wins() { + let dir = tempfile::tempdir().unwrap(); + let global = dir.path().join("global.json"); + let project = dir.path().join("project.json"); + std::fs::write( + &global, + r#"{"mcpServers":{"fs":{"type":"stdio","command":"global-fs"},"x":{"type":"stdio","command":"global-x"}}}"#, + ).unwrap(); + std::fs::write( + &project, + r#"{"mcpServers":{"fs":{"type":"stdio","command":"project-fs"}}}"#, + ).unwrap(); + let cfg = McpConfig::load_layered(Some(&global), Some(&project)).unwrap(); + assert_eq!(cfg.servers.len(), 2); + match cfg.servers.get("fs").unwrap() { + ServerConfig::Stdio { command, .. } => assert_eq!(command, "project-fs"), + _ => unreachable!(), + } + match cfg.servers.get("x").unwrap() { + ServerConfig::Stdio { command, .. } => assert_eq!(command, "global-x"), + _ => unreachable!(), + } + } +} diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs new file mode 100644 index 000000000..5d487979d --- /dev/null +++ b/openab-agent/src/mcp/mod.rs @@ -0,0 +1,3 @@ +//! Native MCP client. See `docs/adr/openab-agent-mcp.md`. + +pub mod config; From 3d588654276de440d63c64e40f85bb0d601328e4 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 17:42:18 +0000 Subject: [PATCH 05/54] style(openab-agent/mcp): apply cargo fmt to config.rs CI fmt-check found 8 formatting deltas in the Phase 1 scaffold. No logic change. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 36 ++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index cba5f8fd1..892699afa 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -6,7 +6,7 @@ use std::collections::HashMap; use std::path::{Path, PathBuf}; -use anyhow::{Context, Result, anyhow}; +use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; #[derive(Debug, Default, Clone, Serialize, Deserialize)] @@ -82,8 +82,7 @@ impl McpConfig { fn load_file(path: &Path) -> Result { let raw = std::fs::read_to_string(path) .with_context(|| format!("read mcp config {}", path.display()))?; - serde_json::from_str(&raw) - .with_context(|| format!("parse mcp config {}", path.display())) + serde_json::from_str(&raw).with_context(|| format!("parse mcp config {}", path.display())) } } @@ -152,13 +151,19 @@ mod tests { use super::*; fn env(pairs: &[(&str, &str)]) -> HashMap { - pairs.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect() + pairs + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect() } #[test] fn interpolate_replaces_tokens() { let e = env(&[("FOO", "bar"), ("X", "y")]); - assert_eq!(interpolate_env("a${env:FOO}b${env:X}", &e).unwrap(), "abarby"); + assert_eq!( + interpolate_env("a${env:FOO}b${env:X}", &e).unwrap(), + "abarby" + ); } #[test] @@ -170,7 +175,9 @@ mod tests { #[test] fn interpolate_errors_on_missing_var() { let e = env(&[]); - let err = interpolate_env("${env:MISSING}", &e).unwrap_err().to_string(); + let err = interpolate_env("${env:MISSING}", &e) + .unwrap_err() + .to_string(); assert!(err.contains("MISSING"), "expected MISSING in error: {err}"); } @@ -200,7 +207,12 @@ mod tests { let cfg: McpConfig = serde_json::from_str(json).unwrap(); assert_eq!(cfg.servers.len(), 2); match cfg.servers.get("fs").unwrap() { - ServerConfig::Stdio { command, args, tool_filter, .. } => { + ServerConfig::Stdio { + command, + args, + tool_filter, + .. + } => { assert_eq!(command, "mcp-server-filesystem"); assert_eq!(args, &vec!["/workspace".to_string()]); assert_eq!(tool_filter.as_ref().unwrap().include, vec!["read_*"]); @@ -219,7 +231,9 @@ mod tests { #[test] fn resolved_substitutes_env_in_args() { // SAFETY: single-threaded test; isolated env key. - unsafe { std::env::set_var("MCP_TEST_TOKEN", "secret123"); } + unsafe { + std::env::set_var("MCP_TEST_TOKEN", "secret123"); + } let cfg = ServerConfig::Stdio { command: "github-mcp-server".into(), args: vec!["--token".into(), "${env:MCP_TEST_TOKEN}".into()], @@ -242,11 +256,13 @@ mod tests { std::fs::write( &global, r#"{"mcpServers":{"fs":{"type":"stdio","command":"global-fs"},"x":{"type":"stdio","command":"global-x"}}}"#, - ).unwrap(); + ) + .unwrap(); std::fs::write( &project, r#"{"mcpServers":{"fs":{"type":"stdio","command":"project-fs"}}}"#, - ).unwrap(); + ) + .unwrap(); let cfg = McpConfig::load_layered(Some(&global), Some(&project)).unwrap(); assert_eq!(cfg.servers.len(), 2); match cfg.servers.get("fs").unwrap() { From b70915937e7b9ff032290879e7529da1abfd79ef Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 17:52:54 +0000 Subject: [PATCH 06/54] ci(openab-agent): exercise --features mcp in clippy + test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI previously only built the default feature set, leaving the Phase 1 MCP scaffold (mcp/config.rs, gated by --features mcp) without compile, clippy, or test coverage. Adds explicit --features mcp invocations and watches workflow file changes so this gap is closed for the rest of the rollout (ADR §9). Co-Authored-By: Claude Opus 4.7 --- .github/workflows/ci-openab-agent.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci-openab-agent.yml b/.github/workflows/ci-openab-agent.yml index c0d5a3727..b33d8b613 100644 --- a/.github/workflows/ci-openab-agent.yml +++ b/.github/workflows/ci-openab-agent.yml @@ -4,9 +4,11 @@ on: push: paths: - 'openab-agent/**' + - '.github/workflows/ci-openab-agent.yml' pull_request: paths: - 'openab-agent/**' + - '.github/workflows/ci-openab-agent.yml' jobs: check: @@ -24,7 +26,9 @@ jobs: workspaces: openab-agent - run: cargo fmt --check - run: cargo clippy -- -D warnings + - run: cargo clippy --features mcp -- -D warnings - run: cargo test + - run: cargo test --features mcp - run: cargo test -- --ignored env: ANTHROPIC_API_KEY: "fake-key-for-ci" From 3067fecdcca5ff8a0167fac1c34f0da59245f0a1 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:03:12 +0000 Subject: [PATCH 07/54] feat(openab-agent/mcp): add 'mcp list' CLI subcommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires up the Phase 1 config loader so clippy --features mcp can see it. McpConfig::load() + ServerConfig::resolved() + serde pretty-print are all reachable from main, clearing the dead-code denial of compile under -D warnings. Output groups successful servers (✓) and failures (✗ with reason), sorted by name for deterministic display. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/main.rs | 17 +++++++++++++++++ openab-agent/src/mcp/mod.rs | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index f5c47f2e8..9486b0a6f 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -24,6 +24,19 @@ enum Commands { #[command(subcommand)] provider: AuthProvider, }, + /// Inspect / manage configured MCP servers + #[cfg(feature = "mcp")] + Mcp { + #[command(subcommand)] + action: McpAction, + }, +} + +#[cfg(feature = "mcp")] +#[derive(Subcommand)] +enum McpAction { + /// List configured MCP servers (loads global + project mcp.json) + List, } #[derive(Subcommand)] @@ -72,5 +85,9 @@ async fn main() { auth::show_status(); } }, + #[cfg(feature = "mcp")] + Some(Commands::Mcp { action }) => match action { + McpAction::List => mcp::cli_list_servers(), + }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 5d487979d..4499a5492 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,3 +1,37 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; + +use config::McpConfig; + +/// `openab-agent mcp list` — load global + project config, resolve env, print. +pub fn cli_list_servers() { + let cfg = match McpConfig::load() { + Ok(c) => c, + Err(e) => { + eprintln!("failed to load mcp config: {e:#}"); + std::process::exit(1); + } + }; + if cfg.servers.is_empty() { + println!("No MCP servers configured."); + println!(" global: ~/.openab/agent/mcp.json"); + println!(" project: ./.openab/agent/mcp.json"); + return; + } + let mut servers: Vec<_> = cfg.servers.iter().collect(); + servers.sort_by(|(a, _), (b, _)| a.cmp(b)); + for (name, server) in servers { + match server.resolved(name) { + Ok(resolved) => { + println!("✓ {name}"); + if let Ok(j) = serde_json::to_string_pretty(&resolved) { + for line in j.lines() { + println!(" {line}"); + } + } + } + Err(e) => println!("✗ {name}: {e:#}"), + } + } +} From 82c43e55b8dc34c98251035f085aaccf82fa0550 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:10:37 +0000 Subject: [PATCH 08/54] fix(openab-agent/mcp): satisfy clippy::unnecessary_sort_by sort_by_key over (name, _) is the cleaner form; no behavior change. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 4499a5492..139f574b1 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -20,7 +20,7 @@ pub fn cli_list_servers() { return; } let mut servers: Vec<_> = cfg.servers.iter().collect(); - servers.sort_by(|(a, _), (b, _)| a.cmp(b)); + servers.sort_by_key(|(name, _)| *name); for (name, server) in servers { match server.resolved(name) { Ok(resolved) => { From cceb42ed03afa6624e1dfa257d6e9faacaeee8ac Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:21:14 +0000 Subject: [PATCH 09/54] feat(openab-agent/mcp): redact secrets in 'mcp list' by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Mira's security review on the Phase 1 thread: the previous 'mcp list' eagerly called ServerConfig::resolved(), substituting \${env:GITHUB_TOKEN} etc. into the printed output. Three leak paths matter — pasting CLI output into bug reports / chat, screen sharing, and stdout log collection. New behavior: - Default: print raw config; \${env:VAR} placeholders kept verbatim. Safe to paste publicly; reader still sees which env var feeds each field. - --resolve opts into substitution and prints a two-line warning banner. Useful for diagnosing missing-env startup failures. No CLI-shape break: 'mcp list' still works; --resolve is additive. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/main.rs | 10 +++++++-- openab-agent/src/mcp/mod.rs | 42 +++++++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index 9486b0a6f..dfa01ac77 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -36,7 +36,13 @@ enum Commands { #[derive(Subcommand)] enum McpAction { /// List configured MCP servers (loads global + project mcp.json) - List, + List { + /// Substitute ${env:VAR} placeholders with real values. + /// WARNING: output will contain secrets if your config references + /// tokens via env vars — do not paste publicly. + #[arg(long)] + resolve: bool, + }, } #[derive(Subcommand)] @@ -87,7 +93,7 @@ async fn main() { }, #[cfg(feature = "mcp")] Some(Commands::Mcp { action }) => match action { - McpAction::List => mcp::cli_list_servers(), + McpAction::List { resolve } => mcp::cli_list_servers(resolve), }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 139f574b1..23245f60d 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -2,10 +2,15 @@ pub mod config; -use config::McpConfig; +use config::{McpConfig, ServerConfig}; -/// `openab-agent mcp list` — load global + project config, resolve env, print. -pub fn cli_list_servers() { +/// `openab-agent mcp list [--resolve]`. +/// +/// Default: print configs verbatim (`${env:VAR}` placeholders kept as-is) so +/// `mcp list` is safe to paste into bug reports. `--resolve` opts into +/// substituting env vars and prints a leading warning — useful for debugging +/// missing-env startup failures locally. +pub fn cli_list_servers(resolve: bool) { let cfg = match McpConfig::load() { Ok(c) => c, Err(e) => { @@ -19,19 +24,34 @@ pub fn cli_list_servers() { println!(" project: ./.openab/agent/mcp.json"); return; } + if resolve { + println!("⚠ --resolve: env vars substituted into output below."); + println!("⚠ Output may contain secrets — do not paste publicly."); + println!(); + } let mut servers: Vec<_> = cfg.servers.iter().collect(); servers.sort_by_key(|(name, _)| *name); for (name, server) in servers { + print_server(name, server, resolve); + } +} + +fn print_server(name: &str, server: &ServerConfig, resolve: bool) { + if resolve { match server.resolved(name) { - Ok(resolved) => { - println!("✓ {name}"); - if let Ok(j) = serde_json::to_string_pretty(&resolved) { - for line in j.lines() { - println!(" {line}"); - } - } - } + Ok(r) => print_json("✓", name, &r), Err(e) => println!("✗ {name}: {e:#}"), } + } else { + print_json("•", name, server); + } +} + +fn print_json(status: &str, name: &str, value: &T) { + println!("{status} {name}"); + if let Ok(json) = serde_json::to_string_pretty(value) { + for line in json.lines() { + println!(" {line}"); + } } } From 20a2448ad70d67c589f0798ad87cb5ae4c786556 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:30:20 +0000 Subject: [PATCH 10/54] fix(openab-agent/mcp): emit --resolve warnings on stderr println! puts the warning banner into stdout, which (a) gets swallowed by 'mcp list --resolve > dump.json' redirection so the user never sees the security notice, and (b) corrupts the JSON payload for downstream pipes like 'mcp list --resolve | jq'. Routing the banner through eprintln! keeps it visible regardless of redirection and keeps stdout pure JSON for piping. Standard Unix convention: data on stdout, diagnostics on stderr. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 23245f60d..f8bae4385 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -25,9 +25,9 @@ pub fn cli_list_servers(resolve: bool) { return; } if resolve { - println!("⚠ --resolve: env vars substituted into output below."); - println!("⚠ Output may contain secrets — do not paste publicly."); - println!(); + eprintln!("⚠ --resolve: env vars substituted into output below."); + eprintln!("⚠ Output may contain secrets — do not paste publicly."); + eprintln!(); } let mut servers: Vec<_> = cfg.servers.iter().collect(); servers.sort_by_key(|(name, _)| *name); From 7079c9063c0e524dcff5988a6f958959b0075527 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:47:32 +0000 Subject: [PATCH 11/54] feat(openab-agent/mcp): add runtime state-machine scaffold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces McpRuntimeManager owning one ServerHandle per configured server, each starting in ServerStatus::Disconnected per ADR §5.7 (lazy connect). Wires the manager via `mcp status` CLI so the types are exercised by clippy --features mcp; actual rmcp TokioChildProcess dial + Connected / Failed transitions land in the next slice to keep that risky bit isolated for bisecting. --- openab-agent/src/main.rs | 3 + openab-agent/src/mcp/mod.rs | 33 ++++++-- openab-agent/src/mcp/runtime.rs | 130 ++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+), 7 deletions(-) create mode 100644 openab-agent/src/mcp/runtime.rs diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index dfa01ac77..066e92f37 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -43,6 +43,8 @@ enum McpAction { #[arg(long)] resolve: bool, }, + /// Show per-server runtime status + Status, } #[derive(Subcommand)] @@ -94,6 +96,7 @@ async fn main() { #[cfg(feature = "mcp")] Some(Commands::Mcp { action }) => match action { McpAction::List { resolve } => mcp::cli_list_servers(resolve), + McpAction::Status => mcp::cli_show_status(), }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index f8bae4385..7345e608e 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,8 +1,17 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; +pub mod runtime; use config::{McpConfig, ServerConfig}; +use runtime::McpRuntimeManager; + +fn load_config_or_exit() -> McpConfig { + McpConfig::load().unwrap_or_else(|e| { + eprintln!("failed to load mcp config: {e:#}"); + std::process::exit(1); + }) +} /// `openab-agent mcp list [--resolve]`. /// @@ -11,13 +20,7 @@ use config::{McpConfig, ServerConfig}; /// substituting env vars and prints a leading warning — useful for debugging /// missing-env startup failures locally. pub fn cli_list_servers(resolve: bool) { - let cfg = match McpConfig::load() { - Ok(c) => c, - Err(e) => { - eprintln!("failed to load mcp config: {e:#}"); - std::process::exit(1); - } - }; + let cfg = load_config_or_exit(); if cfg.servers.is_empty() { println!("No MCP servers configured."); println!(" global: ~/.openab/agent/mcp.json"); @@ -55,3 +58,19 @@ fn print_json(status: &str, name: &str, value: &T) { } } } + +/// `openab-agent mcp status`. +/// +/// Prints per-server runtime status. Phase 1 always reports `Disconnected` +/// because servers are not yet dialed; the next slice wires `connect()` and +/// real state transitions land then. +pub fn cli_show_status() { + let manager = McpRuntimeManager::from_config(load_config_or_exit()); + if manager.is_empty() { + println!("No MCP servers configured."); + return; + } + for (name, status) in manager.statuses() { + println!("{} {name}", status.icon()); + } +} diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs new file mode 100644 index 000000000..1fd26446b --- /dev/null +++ b/openab-agent/src/mcp/runtime.rs @@ -0,0 +1,130 @@ +//! Per-server lifecycle manager. See ADR §5.4 + §5.7. +//! +//! This slice lands only the state-machine scaffold (statuses, handle map, +//! lazy-connect entry point). The actual rmcp `TokioChildProcess` dial + +//! client storage lands in the next slice — keeping that risky bit out of +//! the same commit so any breakage is easy to bisect. + +use std::collections::HashMap; + +use super::config::{McpConfig, ServerConfig}; + +/// Per-server status. ADR §5.7: lazy connect — handles start `Disconnected` +/// and transition to `Connecting` only on first use. Connecting / Connected / +/// Failed are wired up by `connect()` in the next slice. +#[allow(dead_code)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ServerStatus { + Disconnected, + Connecting, + Connected, + Failed(String), +} + +impl ServerStatus { + pub fn icon(&self) -> &'static str { + match self { + ServerStatus::Disconnected => "○", + ServerStatus::Connecting => "◐", + ServerStatus::Connected => "●", + ServerStatus::Failed(_) => "✗", + } + } +} + +#[allow(dead_code)] // name + config consumed by connect() in the next slice +#[derive(Debug)] +pub struct ServerHandle { + pub name: String, + pub config: ServerConfig, + pub status: ServerStatus, +} + +/// Owns one `ServerHandle` per configured server. Created once at process +/// start (or session start, per ADR §5.8 refresh model). +#[derive(Debug, Default)] +pub struct McpRuntimeManager { + handles: HashMap, +} + +impl McpRuntimeManager { + pub fn from_config(cfg: McpConfig) -> Self { + let handles = cfg + .servers + .into_iter() + .map(|(name, config)| { + let handle = ServerHandle { + name: name.clone(), + config, + status: ServerStatus::Disconnected, + }; + (name, handle) + }) + .collect(); + Self { handles } + } + + pub fn statuses(&self) -> Vec<(&str, &ServerStatus)> { + let mut out: Vec<_> = self + .handles + .iter() + .map(|(name, h)| (name.as_str(), &h.status)) + .collect(); + out.sort_by_key(|(name, _)| *name); + out + } + + pub fn len(&self) -> usize { + self.handles.len() + } + + pub fn is_empty(&self) -> bool { + self.handles.is_empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn from_config_initializes_each_server_disconnected() { + let json = r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + assert_eq!(mgr.len(), 2); + let statuses = mgr.statuses(); + assert_eq!(statuses.len(), 2); + for (_, status) in statuses { + assert_eq!(*status, ServerStatus::Disconnected); + } + } + + #[test] + fn empty_config_yields_empty_manager() { + let mgr = McpRuntimeManager::from_config(McpConfig::default()); + assert!(mgr.is_empty()); + assert_eq!(mgr.len(), 0); + assert!(mgr.statuses().is_empty()); + } + + #[test] + fn statuses_sorted_by_name() { + let json = r#"{ + "mcpServers": { + "zed": { "type": "stdio", "command": "z" }, + "alpha": { "type": "stdio", "command": "a" }, + "mid": { "type": "stdio", "command": "m" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let names: Vec<&str> = mgr.statuses().into_iter().map(|(n, _)| n).collect(); + assert_eq!(names, vec!["alpha", "mid", "zed"]); + } +} From 40b849b4dcdc9aeeae9442feb12049e6d47fa2bf Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 18:50:53 +0000 Subject: [PATCH 12/54] fix(openab-agent/mcp): drop unused len() from McpRuntimeManager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `clippy --features mcp -- -D warnings` doesn't compile the test target, so `len()`'s only callers (the unit tests) didn't keep it alive. Tests already used `statuses().len()` in one place — switch the other two to match and drop the now-dead method. `is_empty()` stays because `cli_show_status` calls it. --- openab-agent/src/mcp/runtime.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 1fd26446b..bdb036088 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -74,10 +74,6 @@ impl McpRuntimeManager { out } - pub fn len(&self) -> usize { - self.handles.len() - } - pub fn is_empty(&self) -> bool { self.handles.is_empty() } @@ -97,7 +93,6 @@ mod tests { }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - assert_eq!(mgr.len(), 2); let statuses = mgr.statuses(); assert_eq!(statuses.len(), 2); for (_, status) in statuses { @@ -109,7 +104,6 @@ mod tests { fn empty_config_yields_empty_manager() { let mgr = McpRuntimeManager::from_config(McpConfig::default()); assert!(mgr.is_empty()); - assert_eq!(mgr.len(), 0); assert!(mgr.statuses().is_empty()); } From 3ab8decfeb46db7f73c90c91d095dbf40dff9c37 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:02:54 +0000 Subject: [PATCH 13/54] feat(openab-agent/mcp): wrap handles in Arc, stub async connect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADR §5.7 lifecycle needs a `connect()` method that spawns a child process and awaits an rmcp handshake — both `Send` across `.await`. Plain `HashMap` is not `Sync`, and the background idle- eviction task will share the map with the foreground `mcp call` path, so the read-heavy / write-light access pattern wants `tokio::sync::RwLock`. This slice lands the lock migration only: - `handles: Arc>>` - `statuses()` and `is_empty()` become async; `cli_show_status` follows - `McpRuntimeManager` is now `Clone` (Arc bump) so the eviction task can hold its own handle - `connect(name)` transitions to `Connecting` and returns; the actual `rmcp::TokioChildProcess` dial + `Connected` / `Failed` transitions land in the next slice — keeping that bit isolated for bisecting 6 unit tests cover snapshot ordering, unknown-server error, transition, and clone-shares-state. --- openab-agent/src/main.rs | 2 +- openab-agent/src/mcp/mod.rs | 6 +- openab-agent/src/mcp/runtime.rs | 134 +++++++++++++++++++++++--------- 3 files changed, 103 insertions(+), 39 deletions(-) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index 066e92f37..f96bdd0ed 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -96,7 +96,7 @@ async fn main() { #[cfg(feature = "mcp")] Some(Commands::Mcp { action }) => match action { McpAction::List { resolve } => mcp::cli_list_servers(resolve), - McpAction::Status => mcp::cli_show_status(), + McpAction::Status => mcp::cli_show_status().await, }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 7345e608e..1b6c5a27e 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -64,13 +64,13 @@ fn print_json(status: &str, name: &str, value: &T) { /// Prints per-server runtime status. Phase 1 always reports `Disconnected` /// because servers are not yet dialed; the next slice wires `connect()` and /// real state transitions land then. -pub fn cli_show_status() { +pub async fn cli_show_status() { let manager = McpRuntimeManager::from_config(load_config_or_exit()); - if manager.is_empty() { + if manager.is_empty().await { println!("No MCP servers configured."); return; } - for (name, status) in manager.statuses() { + for (name, status) in manager.statuses().await { println!("{} {name}", status.icon()); } } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index bdb036088..1296ecc4a 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -1,18 +1,24 @@ //! Per-server lifecycle manager. See ADR §5.4 + §5.7. //! -//! This slice lands only the state-machine scaffold (statuses, handle map, -//! lazy-connect entry point). The actual rmcp `TokioChildProcess` dial + -//! client storage lands in the next slice — keeping that risky bit out of -//! the same commit so any breakage is easy to bisect. +//! Handles live behind `Arc>` so `connect()` (async, +//! spawns child processes) is `Send` across `.await` and a background idle- +//! eviction task can share the map with foreground `mcp call` invocations +//! (ADR §5.7). Read-heavy / write-light fits `RwLock`. +//! +//! This slice lands the lock migration and a `connect()` that transitions to +//! `Connecting`; the actual rmcp `TokioChildProcess` dial + transition to +//! `Connected` / `Failed` lands in the next slice — keeping that risky bit +//! isolated for bisecting. use std::collections::HashMap; +use std::sync::Arc; + +use anyhow::{anyhow, Result}; +use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -/// Per-server status. ADR §5.7: lazy connect — handles start `Disconnected` -/// and transition to `Connecting` only on first use. Connecting / Connected / -/// Failed are wired up by `connect()` in the next slice. -#[allow(dead_code)] +#[allow(dead_code)] // Connected / Failed land with the rmcp dial in the next slice #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { Disconnected, @@ -32,7 +38,7 @@ impl ServerStatus { } } -#[allow(dead_code)] // name + config consumed by connect() in the next slice +#[allow(dead_code)] // name + config consumed by the rmcp dial in the next slice #[derive(Debug)] pub struct ServerHandle { pub name: String, @@ -40,16 +46,16 @@ pub struct ServerHandle { pub status: ServerStatus, } -/// Owns one `ServerHandle` per configured server. Created once at process -/// start (or session start, per ADR §5.8 refresh model). -#[derive(Debug, Default)] +/// Owns one `ServerHandle` per configured server, behind an async `RwLock` +/// so the foreground LLM path and the background eviction task can share it. +#[derive(Debug, Default, Clone)] pub struct McpRuntimeManager { - handles: HashMap, + handles: Arc>>, } impl McpRuntimeManager { pub fn from_config(cfg: McpConfig) -> Self { - let handles = cfg + let handles: HashMap<_, _> = cfg .servers .into_iter() .map(|(name, config)| { @@ -61,21 +67,39 @@ impl McpRuntimeManager { (name, handle) }) .collect(); - Self { handles } + Self { + handles: Arc::new(RwLock::new(handles)), + } } - pub fn statuses(&self) -> Vec<(&str, &ServerStatus)> { - let mut out: Vec<_> = self - .handles - .iter() - .map(|(name, h)| (name.as_str(), &h.status)) - .collect(); - out.sort_by_key(|(name, _)| *name); + /// Snapshot of `(name, status)` sorted by name. Clones out so the read + /// guard is dropped before returning — callers don't hold a lock. + pub async fn statuses(&self) -> Vec<(String, ServerStatus)> { + let mut out: Vec<_> = { + let guard = self.handles.read().await; + guard + .iter() + .map(|(name, h)| (name.clone(), h.status.clone())) + .collect() + }; + out.sort_by(|(a, _), (b, _)| a.cmp(b)); out } - pub fn is_empty(&self) -> bool { - self.handles.is_empty() + pub async fn is_empty(&self) -> bool { + self.handles.read().await.is_empty() + } + + /// Transition the named server to `Connecting`. The rmcp + /// `TokioChildProcess` dial + transition to `Connected` / `Failed` + /// lands in the next slice — see module doc. + pub async fn connect(&self, name: &str) -> Result<()> { + let mut guard = self.handles.write().await; + let handle = guard + .get_mut(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + handle.status = ServerStatus::Connecting; + Ok(()) } } @@ -83,8 +107,8 @@ impl McpRuntimeManager { mod tests { use super::*; - #[test] - fn from_config_initializes_each_server_disconnected() { + #[tokio::test] + async fn from_config_initializes_each_server_disconnected() { let json = r#"{ "mcpServers": { "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, @@ -93,22 +117,22 @@ mod tests { }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - let statuses = mgr.statuses(); + let statuses = mgr.statuses().await; assert_eq!(statuses.len(), 2); for (_, status) in statuses { - assert_eq!(*status, ServerStatus::Disconnected); + assert_eq!(status, ServerStatus::Disconnected); } } - #[test] - fn empty_config_yields_empty_manager() { + #[tokio::test] + async fn empty_config_yields_empty_manager() { let mgr = McpRuntimeManager::from_config(McpConfig::default()); - assert!(mgr.is_empty()); - assert!(mgr.statuses().is_empty()); + assert!(mgr.is_empty().await); + assert!(mgr.statuses().await.is_empty()); } - #[test] - fn statuses_sorted_by_name() { + #[tokio::test] + async fn statuses_sorted_by_name() { let json = r#"{ "mcpServers": { "zed": { "type": "stdio", "command": "z" }, @@ -118,7 +142,47 @@ mod tests { }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - let names: Vec<&str> = mgr.statuses().into_iter().map(|(n, _)| n).collect(); + let names: Vec = mgr + .statuses() + .await + .into_iter() + .map(|(n, _)| n) + .collect(); assert_eq!(names, vec!["alpha", "mid", "zed"]); } + + #[tokio::test] + async fn connect_unknown_server_errors() { + let mgr = McpRuntimeManager::from_config(McpConfig::default()); + let err = mgr.connect("missing").await.unwrap_err().to_string(); + assert!(err.contains("missing"), "expected 'missing' in {err}"); + } + + #[tokio::test] + async fn connect_transitions_to_connecting() { + let json = r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "true" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + mgr.connect("fs").await.unwrap(); + let statuses = mgr.statuses().await; + assert_eq!(statuses[0].1, ServerStatus::Connecting); + } + + #[tokio::test] + async fn manager_clone_shares_state() { + let json = r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "true" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let a = McpRuntimeManager::from_config(cfg); + let b = a.clone(); + a.connect("fs").await.unwrap(); + assert_eq!(b.statuses().await[0].1, ServerStatus::Connecting); + } } From dc1b0a6d6d30cffe6df62a01af98f67cff3faeba Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:10:40 +0000 Subject: [PATCH 14/54] fix(openab-agent/mcp): satisfy cargo fmt --check on runtime test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chained `.statuses().await.into_iter().map(|(n, _)| n).collect()` fits on one line under rustfmt's default 100-char width — broke it preemptively last tick on the wrong side of the threshold. --- openab-agent/src/mcp/runtime.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 1296ecc4a..16c759e72 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -142,12 +142,7 @@ mod tests { }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - let names: Vec = mgr - .statuses() - .await - .into_iter() - .map(|(n, _)| n) - .collect(); + let names: Vec = mgr.statuses().await.into_iter().map(|(n, _)| n).collect(); assert_eq!(names, vec!["alpha", "mid", "zed"]); } From be9ee62ed43ced14f709b8405e5e761f2cef100b Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:20:58 +0000 Subject: [PATCH 15/54] fix(openab-agent/mcp): allow(dead_code) on connect() stub `clippy --features mcp -D warnings` doesn't compile the test target, so `connect()`'s only callers (the unit tests) don't keep it alive. The real rmcp dial lands in the next slice and will be reachable from the meta-tool dispatch / `mcp call` CLI path; until then a targeted allow keeps clippy quiet without papering over the type-level dead_code on ServerStatus variants (which is already explicitly allowed). --- openab-agent/src/mcp/runtime.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 16c759e72..f54bff0ed 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -93,6 +93,7 @@ impl McpRuntimeManager { /// Transition the named server to `Connecting`. The rmcp /// `TokioChildProcess` dial + transition to `Connected` / `Failed` /// lands in the next slice — see module doc. + #[allow(dead_code)] // wired into meta-tool dispatch in the next slice; tests keep it covered pub async fn connect(&self, name: &str) -> Result<()> { let mut guard = self.handles.write().await; let handle = guard From 82d5da4bd45b6051f93a6c23ea3fac320a832ec5 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:40:22 +0000 Subject: [PATCH 16/54] feat(openab-agent/mcp): real stdio dial via rmcp + double-lock connect connect() now spawns the configured stdio child via rmcp's TokioChildProcess, runs the JSON-RPC handshake (().serve(transport)), and parks the resulting RunningService on ServerHandle.client. Lock discipline uses a double-acquire: a brief write lock to mark Connecting, drop, run the dial without any lock held, then a second brief write lock to install the client or record Failed(msg). Holding the write lock across the spawn+handshake would block mcp status and the future idle-eviction scan for the entire dial latency. HTTP transport is rejected with a "phase 2" error so misconfigured entries surface cleanly without advancing state past Disconnected. Adds `openab-agent mcp connect ` as a smoke-test CLI for mcp.json entries; the RunningService is dropped on process exit. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/main.rs | 6 ++ openab-agent/src/mcp/mod.rs | 21 ++++- openab-agent/src/mcp/runtime.rs | 134 +++++++++++++++++++++++++------- 3 files changed, 132 insertions(+), 29 deletions(-) diff --git a/openab-agent/src/main.rs b/openab-agent/src/main.rs index f96bdd0ed..7acf6769b 100644 --- a/openab-agent/src/main.rs +++ b/openab-agent/src/main.rs @@ -45,6 +45,11 @@ enum McpAction { }, /// Show per-server runtime status Status, + /// Spawn the configured server and run the MCP handshake (smoke-test). + Connect { + /// Server name as configured in mcp.json + name: String, + }, } #[derive(Subcommand)] @@ -97,6 +102,7 @@ async fn main() { Some(Commands::Mcp { action }) => match action { McpAction::List { resolve } => mcp::cli_list_servers(resolve), McpAction::Status => mcp::cli_show_status().await, + McpAction::Connect { name } => mcp::cli_connect(name).await, }, } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 1b6c5a27e..c6b6bd110 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -61,9 +61,9 @@ fn print_json(status: &str, name: &str, value: &T) { /// `openab-agent mcp status`. /// -/// Prints per-server runtime status. Phase 1 always reports `Disconnected` -/// because servers are not yet dialed; the next slice wires `connect()` and -/// real state transitions land then. +/// Prints per-server runtime status. Servers start `Disconnected` and only +/// advance after `mcp connect ` (or, later, lazy dial from the agent +/// path). pub async fn cli_show_status() { let manager = McpRuntimeManager::from_config(load_config_or_exit()); if manager.is_empty().await { @@ -74,3 +74,18 @@ pub async fn cli_show_status() { println!("{} {name}", status.icon()); } } + +/// `openab-agent mcp connect `. Spawns the configured stdio server, +/// runs the rmcp handshake, and reports success or the failure reason. +/// The connection is dropped on process exit — this CLI is a smoke-test +/// for `mcp.json` entries, not a long-running session. +pub async fn cli_connect(name: String) { + let manager = McpRuntimeManager::from_config(load_config_or_exit()); + match manager.connect(&name).await { + Ok(()) => println!("● connected: {name}"), + Err(e) => { + eprintln!("✗ {name}: {e:#}"); + std::process::exit(1); + } + } +} diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index f54bff0ed..a99b3d6ca 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -5,20 +5,26 @@ //! eviction task can share the map with foreground `mcp call` invocations //! (ADR §5.7). Read-heavy / write-light fits `RwLock`. //! -//! This slice lands the lock migration and a `connect()` that transitions to -//! `Connecting`; the actual rmcp `TokioChildProcess` dial + transition to -//! `Connected` / `Failed` lands in the next slice — keeping that risky bit -//! isolated for bisecting. +//! `connect()` uses a double-lock pattern: a short write lock to mark +//! `Connecting`, release the lock, run the rmcp handshake without holding +//! any lock, then re-acquire briefly to install the client or record the +//! failure. Holding the write lock across the `serve(...).await` would +//! starve every reader (including `mcp status` and the eviction scan) for +//! the duration of a child-process spawn + handshake. use std::collections::HashMap; use std::sync::Arc; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; +use rmcp::service::{RoleClient, RunningService}; +use rmcp::ServiceExt; +use rmcp::transport::{ConfigureCommandExt, TokioChildProcess}; +use tokio::process::Command; use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -#[allow(dead_code)] // Connected / Failed land with the rmcp dial in the next slice +#[allow(dead_code)] // NeedsAuth lands with the Phase 2 OAuth slice (ADR §5.7) #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { Disconnected, @@ -38,12 +44,22 @@ impl ServerStatus { } } -#[allow(dead_code)] // name + config consumed by the rmcp dial in the next slice -#[derive(Debug)] pub struct ServerHandle { pub name: String, pub config: ServerConfig, pub status: ServerStatus, + pub client: Option>, +} + +impl std::fmt::Debug for ServerHandle { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ServerHandle") + .field("name", &self.name) + .field("config", &self.config) + .field("status", &self.status) + .field("client", &self.client.is_some()) + .finish() + } } /// Owns one `ServerHandle` per configured server, behind an async `RwLock` @@ -63,6 +79,7 @@ impl McpRuntimeManager { name: name.clone(), config, status: ServerStatus::Disconnected, + client: None, }; (name, handle) }) @@ -90,17 +107,75 @@ impl McpRuntimeManager { self.handles.read().await.is_empty() } - /// Transition the named server to `Connecting`. The rmcp - /// `TokioChildProcess` dial + transition to `Connected` / `Failed` - /// lands in the next slice — see module doc. - #[allow(dead_code)] // wired into meta-tool dispatch in the next slice; tests keep it covered + /// Lazy-connect the named server (ADR §5.7). Idempotent if already + /// `Connected` with a live client. HTTP transport is Phase 2. pub async fn connect(&self, name: &str) -> Result<()> { + let dial = { + let mut guard = self.handles.write().await; + let handle = guard + .get_mut(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + if matches!(handle.status, ServerStatus::Connected) && handle.client.is_some() { + return Ok(()); + } + let resolved = handle.config.resolved(name)?; + let dial = match resolved { + ServerConfig::Stdio { + command, args, env, .. + } => StdioDial { command, args, env }, + ServerConfig::Http { .. } => { + return Err(anyhow!( + "http transport lands in phase 2 (server {name:?})" + )); + } + }; + handle.status = ServerStatus::Connecting; + dial + }; + + let dial_result = dial.run().await; + let mut guard = self.handles.write().await; let handle = guard .get_mut(name) - .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; - handle.status = ServerStatus::Connecting; - Ok(()) + .ok_or_else(|| anyhow!("server {name:?} vanished during connect"))?; + match dial_result { + Ok(client) => { + handle.status = ServerStatus::Connected; + handle.client = Some(client); + Ok(()) + } + Err(e) => { + let msg = format!("{e:#}"); + handle.status = ServerStatus::Failed(msg.clone()); + Err(anyhow!(msg)) + } + } + } +} + +struct StdioDial { + command: String, + args: Vec, + env: HashMap, +} + +impl StdioDial { + async fn run(self) -> Result> { + let Self { + command, + args, + env, + } = self; + let cmd = Command::new(&command).configure(|c| { + c.args(&args); + c.envs(&env); + }); + let transport = TokioChildProcess::new(cmd) + .with_context(|| format!("spawn mcp child process {command:?}"))?; + ().serve(transport) + .await + .with_context(|| format!("mcp handshake with {command:?}")) } } @@ -155,30 +230,37 @@ mod tests { } #[tokio::test] - async fn connect_transitions_to_connecting() { + async fn connect_http_returns_phase2_error() { let json = r#"{ "mcpServers": { - "fs": { "type": "stdio", "command": "true" } + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } } }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); - mgr.connect("fs").await.unwrap(); - let statuses = mgr.statuses().await; - assert_eq!(statuses[0].1, ServerStatus::Connecting); + let err = mgr.connect("linear").await.unwrap_err().to_string(); + assert!(err.contains("phase 2"), "expected 'phase 2' in {err}"); + // Status not advanced past Disconnected for unsupported transports. + assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); } #[tokio::test] - async fn manager_clone_shares_state() { + async fn connect_to_missing_binary_records_failed() { let json = r#"{ "mcpServers": { - "fs": { "type": "stdio", "command": "true" } + "broken": { + "type": "stdio", + "command": "/nonexistent/path/openab-mcp-test-stub-zzz" + } } }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let a = McpRuntimeManager::from_config(cfg); - let b = a.clone(); - a.connect("fs").await.unwrap(); - assert_eq!(b.statuses().await[0].1, ServerStatus::Connecting); + let mgr = McpRuntimeManager::from_config(cfg); + let err = mgr.connect("broken").await.unwrap_err().to_string(); + assert!(err.contains("spawn"), "expected 'spawn' in {err}"); + match &mgr.statuses().await[0].1 { + ServerStatus::Failed(msg) => assert!(msg.contains("spawn")), + other => panic!("expected Failed, got {other:?}"), + } } } From 8fcd17c3b485e3250c796fe787eddb0a8dc249a5 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:43:04 +0000 Subject: [PATCH 17/54] fix(openab-agent/mcp): race guard for concurrent connect() on same server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Mira's Tick 13 review: when two callers race on the same name, both pass the first-lock idempotency check (Disconnected), both spawn a child, both come back to acquire the install-lock. The second arrival must yield to the winner, otherwise it overwrites the installed client and silently drops the working RunningService. Adds a 3-line double-check at the second lock acquisition: if status is already Connected with a live client, return Ok(()) — `dial_result` falls out of scope here and RunningService::Drop kills the duplicate child. Cheaper than a Connecting-aware notify/condvar and adequate until the eviction-aware slice lands. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index a99b3d6ca..69c1cdc0a 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -139,6 +139,12 @@ impl McpRuntimeManager { let handle = guard .get_mut(name) .ok_or_else(|| anyhow!("server {name:?} vanished during connect"))?; + // Race guard: a concurrent connect() may have installed a client while + // we were dialing. Yield to the winner — `dial_result` drops here, + // killing the duplicate child via RunningService's Drop impl. + if matches!(handle.status, ServerStatus::Connected) && handle.client.is_some() { + return Ok(()); + } match dial_result { Ok(client) => { handle.status = ServerStatus::Connected; From 3b214ddd7c4d63221ec2d46aef16a7fd837e003e Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 19:50:59 +0000 Subject: [PATCH 18/54] fix(openab-agent/mcp): satisfy cargo fmt --check on runtime.rs Three rustfmt collapses that I missed under default 100-char width: * import order: bare `rmcp::ServiceExt` sorts AFTER `rmcp::transport::*` (rustfmt puts sub-module paths before bare items within a crate). * `return Err(anyhow!(...))` for the HTTP phase-2 message fits on one line (~88 chars). * `let Self { command, args, env } = self;` fits on one line. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 69c1cdc0a..9fd04adeb 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -17,8 +17,8 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use rmcp::service::{RoleClient, RunningService}; -use rmcp::ServiceExt; use rmcp::transport::{ConfigureCommandExt, TokioChildProcess}; +use rmcp::ServiceExt; use tokio::process::Command; use tokio::sync::RwLock; @@ -124,9 +124,7 @@ impl McpRuntimeManager { command, args, env, .. } => StdioDial { command, args, env }, ServerConfig::Http { .. } => { - return Err(anyhow!( - "http transport lands in phase 2 (server {name:?})" - )); + return Err(anyhow!("http transport lands in phase 2 (server {name:?})")); } }; handle.status = ServerStatus::Connecting; @@ -168,11 +166,7 @@ struct StdioDial { impl StdioDial { async fn run(self) -> Result> { - let Self { - command, - args, - env, - } = self; + let Self { command, args, env } = self; let cmd = Command::new(&command).configure(|c| { c.args(&args); c.envs(&env); From cf6d7cd33ab84b69f2294c79c1e3d8644fe51d37 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:03:09 +0000 Subject: [PATCH 19/54] feat(openab-agent/mcp): meta-tool action enum + help/list_servers dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the single `mcp` tool the LLM sees (ADR §5.2). This slice lands: * `Action` enum tagged on `action`, matching the eight ADR actions, deserialized straight from the LLM's tool-call payload. * `dispatch(manager, action) -> Result` as the single entry point — both `agent.rs::execute_tool` (next slice) and tests go through this. * `help` returns the static usage doc. * `list_servers` returns `[{name, status, transport}]` via a new `McpRuntimeManager::snapshot()` that clones out under a read lock. * The four IO-bearing actions (`list_tools`, `describe_tool`, `call`, `status`) return a `not yet implemented` error so the surface area is visible to callers without pre-implementing the peer-borrow path. `login` / `complete_login` land with the Phase 2 OAuth slice. `#[allow(dead_code)]` on `Action` and `dispatch` because clippy --features mcp doesn't compile the test target and agent.rs wire-up lands next slice. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 208 ++++++++++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 1 + openab-agent/src/mcp/runtime.rs | 15 +++ 3 files changed, 224 insertions(+) create mode 100644 openab-agent/src/mcp/meta_tool.rs diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs new file mode 100644 index 000000000..8222c1f96 --- /dev/null +++ b/openab-agent/src/mcp/meta_tool.rs @@ -0,0 +1,208 @@ +//! Single `mcp` meta-tool the LLM sees. See ADR §5.2 + §5.3. +//! +//! Phase 1 scope: action enum + dispatch wiring + the two no-IO actions +//! (`help`, `list_servers`). The IO-bearing actions (`list_tools`, +//! `describe_tool`, `call`, `status`) return a `not yet implemented` +//! error so the contract surface is visible to callers while the +//! `RunningService` borrow path lands in the next slice. The Phase 2 +//! `login` / `complete_login` actions land with the OAuth slice. + +use anyhow::{anyhow, Result}; +use serde::Deserialize; +use serde_json::{json, Value}; + +use super::config::ServerConfig; +use super::runtime::{McpRuntimeManager, ServerStatus}; + +/// Deserialized form of the meta-tool's input JSON (ADR §5.2). The LLM +/// sends `{ "action": "...", ... }`; `tag = "action"` routes by that field. +#[allow(dead_code)] // wired into agent.rs execute_tool dispatch in the next slice +#[derive(Debug, Deserialize)] +#[serde(tag = "action", rename_all = "snake_case")] +pub enum Action { + Help, + ListServers, + ListTools { + server: String, + }, + DescribeTool { + server: String, + tool: String, + }, + Call { + server: String, + tool: String, + #[serde(default)] + arguments: Value, + }, + Status { + #[serde(default)] + server: Option, + }, +} + +/// Entry point — the LLM tool dispatcher hands us a deserialized `Action` +/// and we return the JSON payload that becomes the tool result. +#[allow(dead_code)] // wired into agent.rs execute_tool dispatch in the next slice +pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result { + match action { + Action::Help => Ok(json!(HELP)), + Action::ListServers => Ok(list_servers(manager).await), + Action::ListTools { .. } + | Action::DescribeTool { .. } + | Action::Call { .. } + | Action::Status { .. } => Err(anyhow!( + "mcp action not yet implemented in phase 1 (lands with the peer-borrow slice)" + )), + } +} + +const HELP: &str = "\ +The `mcp` tool lets you talk to configured MCP servers. + +Actions: + help show this message + list_servers list configured servers and status + list_tools(server) list tools exposed by a server + describe_tool(server, tool) show input_schema for one tool + call(server, tool, args) invoke a tool + status(server?) per-server health + last error + +Connections are lazy: the first action that needs a server spawns its \ +child process and runs the handshake. Idle servers are evicted after \ +the configured TTL."; + +async fn list_servers(manager: &McpRuntimeManager) -> Value { + let snapshot = manager.snapshot().await; + let entries: Vec = snapshot + .into_iter() + .map(|(name, status, config)| { + json!({ + "name": name, + "status": status_label(&status), + "transport": transport_label(&config), + }) + }) + .collect(); + Value::Array(entries) +} + +fn status_label(status: &ServerStatus) -> &'static str { + match status { + ServerStatus::Disconnected => "disconnected", + ServerStatus::Connecting => "connecting", + ServerStatus::Connected => "connected", + ServerStatus::Failed(_) => "failed", + } +} + +fn transport_label(config: &ServerConfig) -> &'static str { + match config { + ServerConfig::Stdio { .. } => "stdio", + ServerConfig::Http { .. } => "http", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mcp::config::McpConfig; + + fn mgr_from(json: &str) -> McpRuntimeManager { + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + McpRuntimeManager::from_config(cfg) + } + + #[tokio::test] + async fn help_returns_doc_string() { + let mgr = mgr_from(r#"{"mcpServers":{}}"#); + let result = dispatch(&mgr, Action::Help).await.unwrap(); + let s = result.as_str().unwrap(); + assert!(s.contains("list_servers")); + assert!(s.contains("call(server, tool")); + } + + #[tokio::test] + async fn list_servers_reports_name_status_transport() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + } + }"#, + ); + let result = dispatch(&mgr, Action::ListServers).await.unwrap(); + let entries = result.as_array().unwrap(); + assert_eq!(entries.len(), 2); + let by_name: std::collections::HashMap<_, _> = entries + .iter() + .map(|e| (e["name"].as_str().unwrap(), e)) + .collect(); + assert_eq!(by_name["fs"]["transport"], "stdio"); + assert_eq!(by_name["fs"]["status"], "disconnected"); + assert_eq!(by_name["linear"]["transport"], "http"); + } + + #[tokio::test] + async fn list_servers_empty_yields_empty_array() { + let mgr = mgr_from(r#"{"mcpServers":{}}"#); + let result = dispatch(&mgr, Action::ListServers).await.unwrap(); + assert!(result.as_array().unwrap().is_empty()); + } + + #[tokio::test] + async fn unimplemented_actions_error_cleanly() { + let mgr = mgr_from(r#"{"mcpServers":{}}"#); + for action in [ + Action::ListTools { + server: "fs".into(), + }, + Action::DescribeTool { + server: "fs".into(), + tool: "read".into(), + }, + Action::Call { + server: "fs".into(), + tool: "read".into(), + arguments: json!({}), + }, + Action::Status { server: None }, + ] { + let err = dispatch(&mgr, action).await.unwrap_err().to_string(); + assert!(err.contains("not yet implemented"), "got: {err}"); + } + } + + #[test] + fn action_deserializes_from_meta_tool_payload() { + let payload = json!({ + "action": "call", + "server": "github", + "tool": "create_issue", + "arguments": { "title": "x" } + }); + let action: Action = serde_json::from_value(payload).unwrap(); + match action { + Action::Call { + server, + tool, + arguments, + } => { + assert_eq!(server, "github"); + assert_eq!(tool, "create_issue"); + assert_eq!(arguments["title"], "x"); + } + other => panic!("expected Call, got {other:?}"), + } + } + + #[test] + fn action_status_server_is_optional() { + let action: Action = serde_json::from_value(json!({ "action": "status" })).unwrap(); + assert!(matches!(action, Action::Status { server: None })); + let action: Action = + serde_json::from_value(json!({ "action": "status", "server": "fs" })).unwrap(); + assert!(matches!(action, Action::Status { server: Some(_) })); + } +} diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index c6b6bd110..b7b7b8f78 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,6 +1,7 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; +pub mod meta_tool; pub mod runtime; use config::{McpConfig, ServerConfig}; diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 9fd04adeb..e48207017 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -107,6 +107,21 @@ impl McpRuntimeManager { self.handles.read().await.is_empty() } + /// Snapshot of `(name, status, config)` sorted by name. Used by the + /// `list_servers` meta-tool action which needs the transport variant + /// alongside the runtime status. + pub async fn snapshot(&self) -> Vec<(String, ServerStatus, ServerConfig)> { + let mut out: Vec<_> = { + let guard = self.handles.read().await; + guard + .iter() + .map(|(name, h)| (name.clone(), h.status.clone(), h.config.clone())) + .collect() + }; + out.sort_by(|(a, ..), (b, ..)| a.cmp(b)); + out + } + /// Lazy-connect the named server (ADR §5.7). Idempotent if already /// `Connected` with a live client. HTTP transport is Phase 2. pub async fn connect(&self, name: &str) -> Result<()> { From 27f5ccaf07c40f79e9d7d533c1b75ad4c9a096ae Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:12:24 +0000 Subject: [PATCH 20/54] =?UTF-8?q?refactor(openab-agent/mcp):=20apply=20Mir?= =?UTF-8?q?a=20Tick=2015=20review=20=E2=80=94=20zero-clone=20+=20LLM=20fal?= =?UTF-8?q?lback=20hint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two of Mira's three Tick 15 findings (Arc-wrap for peer borrow lands with the actual list_tools/call slice — Arc without a consumer earns no keep). (1) `not_implemented_msg(action)` names the requested action, lists the actions that DO work (`help`, `list_servers`), and tells the LLM to fall back to native `read` / `write` / `edit` / `bash` rather than retrying. The old generic "not yet implemented" gave the model nothing to act on. (2) `ServerConfig::transport_label() -> &'static str` replaces the config clone inside `snapshot()`. The `Stdio` variant carries an `args: Vec` and `env: HashMap`; cloning that just to read the variant tag was wasted heap traffic on every `list_servers` call. `snapshot()` now yields `(String, ServerStatus, &'static str)`. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 13 +++++ openab-agent/src/mcp/meta_tool.rs | 83 +++++++++++++++++++------------ openab-agent/src/mcp/runtime.rs | 10 ++-- 3 files changed, 69 insertions(+), 37 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 892699afa..742459430 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -36,6 +36,19 @@ pub enum ServerConfig { }, } +impl ServerConfig { + /// Static label used by the `mcp` meta-tool's `list_servers` action. + /// Returning `&'static str` lets `snapshot()` avoid cloning the + /// (potentially large) `Stdio { args, env, ... }` payload just to + /// read the transport variant. + pub fn transport_label(&self) -> &'static str { + match self { + ServerConfig::Stdio { .. } => "stdio", + ServerConfig::Http { .. } => "http", + } + } +} + #[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct ToolFilter { #[serde(default)] diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 8222c1f96..ee49e454c 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -11,7 +11,6 @@ use anyhow::{anyhow, Result}; use serde::Deserialize; use serde_json::{json, Value}; -use super::config::ServerConfig; use super::runtime::{McpRuntimeManager, ServerStatus}; /// Deserialized form of the meta-tool's input JSON (ADR §5.2). The LLM @@ -48,15 +47,30 @@ pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result Ok(json!(HELP)), Action::ListServers => Ok(list_servers(manager).await), - Action::ListTools { .. } - | Action::DescribeTool { .. } - | Action::Call { .. } - | Action::Status { .. } => Err(anyhow!( - "mcp action not yet implemented in phase 1 (lands with the peer-borrow slice)" - )), + other => Err(anyhow!("{}", not_implemented_msg(&other))), } } +/// Error body for actions whose handler hasn't landed yet. Mentions the +/// requested action and the supported set so the LLM can recover by +/// falling back to the native `read` / `write` / `edit` / `bash` tools +/// instead of retrying the same action blindly. +fn not_implemented_msg(action: &Action) -> String { + let name = match action { + Action::Help => "help", + Action::ListServers => "list_servers", + Action::ListTools { .. } => "list_tools", + Action::DescribeTool { .. } => "describe_tool", + Action::Call { .. } => "call", + Action::Status { .. } => "status", + }; + format!( + "mcp action '{name}' is not yet implemented (phase 1 scaffold). \ + Currently supported: 'help', 'list_servers'. To complete your task \ + right now, fall back to the native agent tools (read, write, edit, bash)." + ) +} + const HELP: &str = "\ The `mcp` tool lets you talk to configured MCP servers. @@ -76,11 +90,11 @@ async fn list_servers(manager: &McpRuntimeManager) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot .into_iter() - .map(|(name, status, config)| { + .map(|(name, status, transport)| { json!({ "name": name, "status": status_label(&status), - "transport": transport_label(&config), + "transport": transport, }) }) .collect(); @@ -96,13 +110,6 @@ fn status_label(status: &ServerStatus) -> &'static str { } } -fn transport_label(config: &ServerConfig) -> &'static str { - match config { - ServerConfig::Stdio { .. } => "stdio", - ServerConfig::Http { .. } => "http", - } -} - #[cfg(test)] mod tests { use super::*; @@ -152,25 +159,37 @@ mod tests { } #[tokio::test] - async fn unimplemented_actions_error_cleanly() { + async fn unimplemented_actions_name_themselves_and_guide_fallback() { let mgr = mgr_from(r#"{"mcpServers":{}}"#); - for action in [ - Action::ListTools { - server: "fs".into(), - }, - Action::DescribeTool { - server: "fs".into(), - tool: "read".into(), - }, - Action::Call { - server: "fs".into(), - tool: "read".into(), - arguments: json!({}), - }, - Action::Status { server: None }, - ] { + let cases = [ + ( + Action::ListTools { + server: "fs".into(), + }, + "list_tools", + ), + ( + Action::DescribeTool { + server: "fs".into(), + tool: "read".into(), + }, + "describe_tool", + ), + ( + Action::Call { + server: "fs".into(), + tool: "read".into(), + arguments: json!({}), + }, + "call", + ), + (Action::Status { server: None }, "status"), + ]; + for (action, expected_name) in cases { let err = dispatch(&mgr, action).await.unwrap_err().to_string(); + assert!(err.contains(expected_name), "missing action name: {err}"); assert!(err.contains("not yet implemented"), "got: {err}"); + assert!(err.contains("read, write, edit, bash"), "missing fallback: {err}"); } } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index e48207017..a4828e899 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -107,15 +107,15 @@ impl McpRuntimeManager { self.handles.read().await.is_empty() } - /// Snapshot of `(name, status, config)` sorted by name. Used by the - /// `list_servers` meta-tool action which needs the transport variant - /// alongside the runtime status. - pub async fn snapshot(&self) -> Vec<(String, ServerStatus, ServerConfig)> { + /// Snapshot of `(name, status, transport_label)` sorted by name. Used + /// by the `list_servers` meta-tool action; the static transport label + /// avoids cloning the `Stdio { args, env, .. }` payload. + pub async fn snapshot(&self) -> Vec<(String, ServerStatus, &'static str)> { let mut out: Vec<_> = { let guard = self.handles.read().await; guard .iter() - .map(|(name, h)| (name.clone(), h.status.clone(), h.config.clone())) + .map(|(name, h)| (name.clone(), h.status.clone(), h.config.transport_label())) .collect() }; out.sort_by(|(a, ..), (b, ..)| a.cmp(b)); From 8b91c1c089659946d68c4b4af820fd18e4332b61 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:20:38 +0000 Subject: [PATCH 21/54] fix(openab-agent/mcp): split wide assert!() to satisfy cargo fmt --check rustfmt's fn_call_width default (60) splits assert!() args when the inline args exceed it; the new "read, write, edit, bash" fallback assert tipped the third call from ~55 to ~62 chars. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index ee49e454c..548b99691 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -189,7 +189,10 @@ mod tests { let err = dispatch(&mgr, action).await.unwrap_err().to_string(); assert!(err.contains(expected_name), "missing action name: {err}"); assert!(err.contains("not yet implemented"), "got: {err}"); - assert!(err.contains("read, write, edit, bash"), "missing fallback: {err}"); + assert!( + err.contains("read, write, edit, bash"), + "missing fallback: {err}" + ); } } From 13d5ea7b03ae51bf8858e328111b1dbd592ae527 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:34:02 +0000 Subject: [PATCH 22/54] feat(openab-agent/mcp): list_tools action + Arc-cloned peer borrow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply Mira's Tick 15 review (3) and ship the first IO-bearing meta-tool action. * `ServerHandle.client` is now `Option>>`. `connect()` wraps the dial result in `Arc::new`. * `McpRuntimeManager::arc_peer(name)` clones the Arc out under a short read lock, drops the guard, returns the handle. Callers `.await` on `peer.list_all_tools()` / `peer.call_tool()` with no runtime lock held — kills the writer-starvation risk Mira flagged and sidesteps `Future is not Send` from holding a guard across `.await`. * `Action::ListTools { server }` now wired: lazy `connect()` → `arc_peer()` → `peer.list_all_tools()` → `[{name, description}]`. `describe_tool` / `call` / `status` still stub. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 62 ++++++++++++++++++++++++++----- openab-agent/src/mcp/runtime.rs | 28 +++++++++++++- 2 files changed, 79 insertions(+), 11 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 548b99691..413cd73bf 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -7,7 +7,7 @@ //! `RunningService` borrow path lands in the next slice. The Phase 2 //! `login` / `complete_login` actions land with the OAuth slice. -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use serde::Deserialize; use serde_json::{json, Value}; @@ -47,6 +47,7 @@ pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result Ok(json!(HELP)), Action::ListServers => Ok(list_servers(manager).await), + Action::ListTools { server } => list_tools(manager, &server).await, other => Err(anyhow!("{}", not_implemented_msg(&other))), } } @@ -66,8 +67,9 @@ fn not_implemented_msg(action: &Action) -> String { }; format!( "mcp action '{name}' is not yet implemented (phase 1 scaffold). \ - Currently supported: 'help', 'list_servers'. To complete your task \ - right now, fall back to the native agent tools (read, write, edit, bash)." + Currently supported: 'help', 'list_servers', 'list_tools'. To complete \ + your task right now, fall back to the native agent tools (read, write, \ + edit, bash)." ) } @@ -86,6 +88,30 @@ Connections are lazy: the first action that needs a server spawns its \ child process and runs the handshake. Idle servers are evicted after \ the configured TTL."; +async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result { + // Lazy connect per ADR §5.3 — idempotent if already Connected. + manager + .connect(server) + .await + .with_context(|| format!("connect mcp server {server:?}"))?; + let peer = manager.arc_peer(server).await?; + // Arc lets the I/O `.await` run with no runtime lock held. + let tools = peer + .list_all_tools() + .await + .with_context(|| format!("list_all_tools on {server:?}"))?; + let entries: Vec = tools + .into_iter() + .map(|t| { + json!({ + "name": t.name, + "description": t.description, + }) + }) + .collect(); + Ok(Value::Array(entries)) +} + async fn list_servers(manager: &McpRuntimeManager) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot @@ -158,16 +184,34 @@ mod tests { assert!(result.as_array().unwrap().is_empty()); } + #[tokio::test] + async fn list_tools_propagates_connect_failure() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/path/openab-mcp-test-stub-zzz" + } + } + }"#, + ); + let err = dispatch( + &mgr, + Action::ListTools { + server: "broken".into(), + }, + ) + .await + .unwrap_err() + .to_string(); + assert!(err.contains("connect mcp server"), "got: {err}"); + } + #[tokio::test] async fn unimplemented_actions_name_themselves_and_guide_fallback() { let mgr = mgr_from(r#"{"mcpServers":{}}"#); let cases = [ - ( - Action::ListTools { - server: "fs".into(), - }, - "list_tools", - ), ( Action::DescribeTool { server: "fs".into(), diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index a4828e899..5fa41c8d3 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -48,7 +48,11 @@ pub struct ServerHandle { pub name: String, pub config: ServerConfig, pub status: ServerStatus, - pub client: Option>, + /// `Arc` so foreground callers can clone a peer handle out under a + /// short read lock, drop the guard, and then run `peer.list_all_tools()` + /// / `peer.call_tool()` without holding any runtime lock across the + /// I/O `.await` (avoids writer starvation + `Future is not Send` traps). + pub client: Option>>, } impl std::fmt::Debug for ServerHandle { @@ -107,6 +111,26 @@ impl McpRuntimeManager { self.handles.read().await.is_empty() } + /// Clone the live MCP client handle for `name` out from under a short + /// read lock. The caller `.await`s on the returned `Arc` with no + /// runtime lock held, so background writers (idle eviction, new + /// `connect`s) are not starved by long-running tool calls. + /// + /// Errors if the server isn't configured or isn't currently + /// `Connected`. Callers that want lazy-connect should run + /// `connect(name)` first. + pub async fn arc_peer(&self, name: &str) -> Result>> { + let guard = self.handles.read().await; + let handle = guard + .get(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + handle + .client + .as_ref() + .cloned() + .ok_or_else(|| anyhow!("mcp server {name:?} is not connected")) + } + /// Snapshot of `(name, status, transport_label)` sorted by name. Used /// by the `list_servers` meta-tool action; the static transport label /// avoids cloning the `Stdio { args, env, .. }` payload. @@ -161,7 +185,7 @@ impl McpRuntimeManager { match dial_result { Ok(client) => { handle.status = ServerStatus::Connected; - handle.client = Some(client); + handle.client = Some(Arc::new(client)); Ok(()) } Err(e) => { From 2c7f1c62457e51ec212030078570e634ae8d5ccb Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:43:54 +0000 Subject: [PATCH 23/54] feat(openab-agent/mcp): call action with lenient argument coercion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply Mira's Tick 18 review (3) — lenient args parsing for the `call` action so LLMs that send `arguments: null` (or omit the field entirely) for no-arg tools don't bounce off a strict type check. * `Action::Call { server, tool, arguments }` is wired through `call_tool()`: validate arguments → lazy `connect()` → `arc_peer()` → `peer.call_tool(CallToolRequestParams::new(...).with_arguments(map))` → serialize `CallToolResult` to JSON. * Argument coercion: `Value::Object(map) → map`, `Value::Null → {}`, everything else (string, number, array, bool) is rejected with a message that names the actual type so the LLM can correct itself. * Tests cover both branches: non-object string args fail early at validation; null args pass validation and fail later at connect. * `describe_tool` and `status` still stub; supported-list message updated to include `call`. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 104 ++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 11 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 413cd73bf..850f2be27 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -48,6 +48,11 @@ pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result Ok(json!(HELP)), Action::ListServers => Ok(list_servers(manager).await), Action::ListTools { server } => list_tools(manager, &server).await, + Action::Call { + server, + tool, + arguments, + } => call_tool(manager, &server, &tool, arguments).await, other => Err(anyhow!("{}", not_implemented_msg(&other))), } } @@ -67,9 +72,9 @@ fn not_implemented_msg(action: &Action) -> String { }; format!( "mcp action '{name}' is not yet implemented (phase 1 scaffold). \ - Currently supported: 'help', 'list_servers', 'list_tools'. To complete \ - your task right now, fall back to the native agent tools (read, write, \ - edit, bash)." + Currently supported: 'help', 'list_servers', 'list_tools', 'call'. \ + To complete your task right now, fall back to the native agent tools \ + (read, write, edit, bash)." ) } @@ -88,6 +93,39 @@ Connections are lazy: the first action that needs a server spawns its \ child process and runs the handshake. Idle servers are evicted after \ the configured TTL."; +async fn call_tool( + manager: &McpRuntimeManager, + server: &str, + tool: &str, + arguments: Value, +) -> Result { + // Lenient arg coercion per Mira's Tick 18 review: LLMs often send + // `null` or omit `arguments` for no-arg tools; rejecting those would + // make zero-arg calls fragile. Only real type errors (string, number, + // array, bool) are refused. + let args_map = match arguments { + Value::Object(map) => map, + Value::Null => serde_json::Map::new(), + other => { + return Err(anyhow!( + "mcp call arguments must be a JSON object (or null/omitted for no-arg tools), got {other}" + )); + } + }; + manager + .connect(server) + .await + .with_context(|| format!("connect mcp server {server:?}"))?; + let peer = manager.arc_peer(server).await?; + let params = rmcp::model::CallToolRequestParams::new(tool.to_string()) + .with_arguments(args_map); + let result = peer + .call_tool(params) + .await + .with_context(|| format!("call_tool {tool:?} on {server:?}"))?; + serde_json::to_value(&result).context("serialize CallToolResult") +} + async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result { // Lazy connect per ADR §5.3 — idempotent if already Connected. manager @@ -184,6 +222,58 @@ mod tests { assert!(result.as_array().unwrap().is_empty()); } + #[tokio::test] + async fn call_rejects_non_object_arguments() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "true" } + } + }"#, + ); + let err = dispatch( + &mgr, + Action::Call { + server: "fs".into(), + tool: "read".into(), + arguments: json!("oops, a string"), + }, + ) + .await + .unwrap_err() + .to_string(); + assert!(err.contains("must be a JSON object"), "got: {err}"); + } + + #[tokio::test] + async fn call_null_arguments_passes_validation_and_reaches_connect() { + // Null args should be coerced to {} and fail at the *connect* step + // (binary doesn't exist), not at the validation step. + let mgr = mgr_from( + r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/openab-mcp-test-stub-zzz" + } + } + }"#, + ); + let err = dispatch( + &mgr, + Action::Call { + server: "broken".into(), + tool: "read".into(), + arguments: Value::Null, + }, + ) + .await + .unwrap_err() + .to_string(); + assert!(err.contains("connect mcp server"), "got: {err}"); + assert!(!err.contains("must be a JSON object"), "got: {err}"); + } + #[tokio::test] async fn list_tools_propagates_connect_failure() { let mgr = mgr_from( @@ -219,14 +309,6 @@ mod tests { }, "describe_tool", ), - ( - Action::Call { - server: "fs".into(), - tool: "read".into(), - arguments: json!({}), - }, - "call", - ), (Action::Status { server: None }, "status"), ]; for (action, expected_name) in cases { From 2a859e6a4f70467338f8e547166f610078d5582d Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 20:51:26 +0000 Subject: [PATCH 24/54] fix(openab-agent/mcp): collapse let-params chain to satisfy cargo fmt The CallToolRequestParams chain fits in exactly 100 chars; rustfmt keeps it on one line. Burned a tick on a defensive break. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 850f2be27..5b3d456ec 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -117,8 +117,7 @@ async fn call_tool( .await .with_context(|| format!("connect mcp server {server:?}"))?; let peer = manager.arc_peer(server).await?; - let params = rmcp::model::CallToolRequestParams::new(tool.to_string()) - .with_arguments(args_map); + let params = rmcp::model::CallToolRequestParams::new(tool.to_string()).with_arguments(args_map); let result = peer .call_tool(params) .await From 6653008ab5e5e003cc1c0896e25065b31b5db55c Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:13:30 +0000 Subject: [PATCH 25/54] feat(openab-agent/mcp): describe_tool + status actions complete phase 1 surface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit describe_tool returns full input_schema for one tool (list+filter via list_all_tools — MCP spec has no single-tool query). status reports per-server {name, status, transport, last_error} with optional name filter; surfaces Failed's embedded error as last_error. Extract fetch_tools helper shared by list_tools + describe_tool — this is the natural insertion point for the planned tools_cache. --- openab-agent/src/mcp/meta_tool.rs | 220 ++++++++++++++++++++++-------- 1 file changed, 165 insertions(+), 55 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 5b3d456ec..8f8503a3e 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -1,11 +1,8 @@ //! Single `mcp` meta-tool the LLM sees. See ADR §5.2 + §5.3. //! -//! Phase 1 scope: action enum + dispatch wiring + the two no-IO actions -//! (`help`, `list_servers`). The IO-bearing actions (`list_tools`, -//! `describe_tool`, `call`, `status`) return a `not yet implemented` -//! error so the contract surface is visible to callers while the -//! `RunningService` borrow path lands in the next slice. The Phase 2 -//! `login` / `complete_login` actions land with the OAuth slice. +//! Phase 1 scope: action enum + dispatch wiring + all six Phase 1 actions +//! (`help`, `list_servers`, `list_tools`, `describe_tool`, `call`, `status`). +//! The Phase 2 `login` / `complete_login` actions land with the OAuth slice. use anyhow::{anyhow, Context, Result}; use serde::Deserialize; @@ -48,36 +45,16 @@ pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result Ok(json!(HELP)), Action::ListServers => Ok(list_servers(manager).await), Action::ListTools { server } => list_tools(manager, &server).await, + Action::DescribeTool { server, tool } => describe_tool(manager, &server, &tool).await, Action::Call { server, tool, arguments, } => call_tool(manager, &server, &tool, arguments).await, - other => Err(anyhow!("{}", not_implemented_msg(&other))), + Action::Status { server } => Ok(status(manager, server.as_deref()).await), } } -/// Error body for actions whose handler hasn't landed yet. Mentions the -/// requested action and the supported set so the LLM can recover by -/// falling back to the native `read` / `write` / `edit` / `bash` tools -/// instead of retrying the same action blindly. -fn not_implemented_msg(action: &Action) -> String { - let name = match action { - Action::Help => "help", - Action::ListServers => "list_servers", - Action::ListTools { .. } => "list_tools", - Action::DescribeTool { .. } => "describe_tool", - Action::Call { .. } => "call", - Action::Status { .. } => "status", - }; - format!( - "mcp action '{name}' is not yet implemented (phase 1 scaffold). \ - Currently supported: 'help', 'list_servers', 'list_tools', 'call'. \ - To complete your task right now, fall back to the native agent tools \ - (read, write, edit, bash)." - ) -} - const HELP: &str = "\ The `mcp` tool lets you talk to configured MCP servers. @@ -125,19 +102,24 @@ async fn call_tool( serde_json::to_value(&result).context("serialize CallToolResult") } -async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result { - // Lazy connect per ADR §5.3 — idempotent if already Connected. +/// Lazy-connect + list all tools on `server`. Shared by `list_tools` / +/// `describe_tool` (and the planned `tools_cache` on ServerHandle will plug +/// in here). The `Arc` clone lets the I/O `.await` run with +/// no runtime lock held. +async fn fetch_tools(manager: &McpRuntimeManager, server: &str) -> Result> { manager .connect(server) .await .with_context(|| format!("connect mcp server {server:?}"))?; let peer = manager.arc_peer(server).await?; - // Arc lets the I/O `.await` run with no runtime lock held. - let tools = peer - .list_all_tools() + peer.list_all_tools() .await - .with_context(|| format!("list_all_tools on {server:?}"))?; - let entries: Vec = tools + .with_context(|| format!("list_all_tools on {server:?}")) +} + +async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result { + let entries: Vec = fetch_tools(manager, server) + .await? .into_iter() .map(|t| { json!({ @@ -149,6 +131,43 @@ async fn list_tools(manager: &McpRuntimeManager, server: &str) -> Result Ok(Value::Array(entries)) } +async fn describe_tool(manager: &McpRuntimeManager, server: &str, tool: &str) -> Result { + // Progressive disclosure (ADR §5.2): `list_tools` returns compact + // `{name, description}`; this action returns the full `input_schema` + // for one tool. MCP has no single-tool query, so we list + filter. + let tool_def = fetch_tools(manager, server) + .await? + .into_iter() + .find(|t| t.name.as_ref() == tool) + .ok_or_else(|| anyhow!("no tool {tool:?} on mcp server {server:?}"))?; + Ok(json!({ + "name": tool_def.name, + "description": tool_def.description, + "input_schema": tool_def.input_schema, + })) +} + +async fn status(manager: &McpRuntimeManager, filter: Option<&str>) -> Value { + let snapshot = manager.snapshot().await; + let entries: Vec = snapshot + .into_iter() + .filter(|(name, _, _)| filter.map_or(true, |f| f == name.as_str())) + .map(|(name, status, transport)| { + let last_error = match &status { + ServerStatus::Failed(msg) => Some(msg.clone()), + _ => None, + }; + json!({ + "name": name, + "status": status_label(&status), + "transport": transport, + "last_error": last_error, + }) + }) + .collect(); + Value::Array(entries) +} + async fn list_servers(manager: &McpRuntimeManager) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot @@ -298,29 +317,120 @@ mod tests { } #[tokio::test] - async fn unimplemented_actions_name_themselves_and_guide_fallback() { - let mgr = mgr_from(r#"{"mcpServers":{}}"#); - let cases = [ - ( - Action::DescribeTool { - server: "fs".into(), - tool: "read".into(), - }, - "describe_tool", - ), - (Action::Status { server: None }, "status"), - ]; - for (action, expected_name) in cases { - let err = dispatch(&mgr, action).await.unwrap_err().to_string(); - assert!(err.contains(expected_name), "missing action name: {err}"); - assert!(err.contains("not yet implemented"), "got: {err}"); - assert!( - err.contains("read, write, edit, bash"), - "missing fallback: {err}" - ); + async fn describe_tool_propagates_connect_failure() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/path/openab-mcp-test-stub-zzz" + } + } + }"#, + ); + let err = dispatch( + &mgr, + Action::DescribeTool { + server: "broken".into(), + tool: "read".into(), + }, + ) + .await + .unwrap_err() + .to_string(); + assert!(err.contains("connect mcp server"), "got: {err}"); + } + + #[tokio::test] + async fn status_lists_each_server_with_null_last_error_by_default() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + } + }"#, + ); + let result = dispatch(&mgr, Action::Status { server: None }).await.unwrap(); + let entries = result.as_array().unwrap(); + assert_eq!(entries.len(), 2); + for e in entries { + assert_eq!(e["status"], "disconnected"); + assert!(e["last_error"].is_null()); } } + #[tokio::test] + async fn status_filter_by_server_returns_single_entry() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + } + }"#, + ); + let result = dispatch( + &mgr, + Action::Status { + server: Some("fs".into()), + }, + ) + .await + .unwrap(); + let entries = result.as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["name"], "fs"); + assert_eq!(entries[0]["transport"], "stdio"); + } + + #[tokio::test] + async fn status_unknown_filter_returns_empty_array() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" } + } + }"#, + ); + let result = dispatch( + &mgr, + Action::Status { + server: Some("nope".into()), + }, + ) + .await + .unwrap(); + assert!(result.as_array().unwrap().is_empty()); + } + + #[tokio::test] + async fn status_surfaces_last_error_after_failed_connect() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/path/openab-mcp-test-stub-zzz" + } + } + }"#, + ); + let _ = dispatch( + &mgr, + Action::ListTools { + server: "broken".into(), + }, + ) + .await; + let result = dispatch(&mgr, Action::Status { server: None }).await.unwrap(); + let entries = result.as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["status"], "failed"); + let last_error = entries[0]["last_error"].as_str().unwrap(); + assert!(last_error.contains("spawn"), "got: {last_error}"); + } + #[test] fn action_deserializes_from_meta_tool_payload() { let payload = json!({ From f36fb77b350091bfb3b80688dd1d1621dadf08d9 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:14:54 +0000 Subject: [PATCH 26/54] fix(openab-agent/mcp): break dispatch().await.unwrap() chain past chain_width --- openab-agent/src/mcp/meta_tool.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 8f8503a3e..ad702dd46 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -351,7 +351,9 @@ mod tests { } }"#, ); - let result = dispatch(&mgr, Action::Status { server: None }).await.unwrap(); + let result = dispatch(&mgr, Action::Status { server: None }) + .await + .unwrap(); let entries = result.as_array().unwrap(); assert_eq!(entries.len(), 2); for e in entries { @@ -423,7 +425,9 @@ mod tests { }, ) .await; - let result = dispatch(&mgr, Action::Status { server: None }).await.unwrap(); + let result = dispatch(&mgr, Action::Status { server: None }) + .await + .unwrap(); let entries = result.as_array().unwrap(); assert_eq!(entries.len(), 1); assert_eq!(entries[0]["status"], "failed"); From 6a3e9e0c79003b6ff1e9a29803ae9f3d4730b0b9 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:21:49 +0000 Subject: [PATCH 27/54] fix(openab-agent/mcp): use is_none_or per clippy::unnecessary_map_or --- openab-agent/src/mcp/meta_tool.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index ad702dd46..2588c97bc 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -151,7 +151,7 @@ async fn status(manager: &McpRuntimeManager, filter: Option<&str>) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot .into_iter() - .filter(|(name, _, _)| filter.map_or(true, |f| f == name.as_str())) + .filter(|(name, _, _)| filter.is_none_or(|f| f == name.as_str())) .map(|(name, status, transport)| { let last_error = match &status { ServerStatus::Failed(msg) => Some(msg.clone()), From 6e2b73683c49b6def9b939be2b8fa3d99fc72fca Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:44:05 +0000 Subject: [PATCH 28/54] feat(openab-agent/mcp): wire mcp meta-tool into agent dispatch loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaces the `mcp` action enum to the LLM via a new ToolDef and routes calls through McpRuntimeManager when configured. AcpServer constructs the manager once at startup (warn-and-default on config parse failure) and clones the cheap Arc-backed handle into each session's Agent. Tools.rs stays stateless and feature-flag-free — the mcp routing arm lives in Agent::execute_tool_call where it has access to the manager. A shared MCP_TOOL_NAME const keeps mcp_tool_def() and the dispatch arm in sync. --- openab-agent/src/acp.rs | 13 +++++- openab-agent/src/agent.rs | 71 +++++++++++++++++++++++++++---- openab-agent/src/mcp/meta_tool.rs | 2 - openab-agent/src/mcp/mod.rs | 58 ++++++++++++++++++++++++- 4 files changed, 132 insertions(+), 12 deletions(-) diff --git a/openab-agent/src/acp.rs b/openab-agent/src/acp.rs index 38054f25d..8529f7e82 100644 --- a/openab-agent/src/acp.rs +++ b/openab-agent/src/acp.rs @@ -1,5 +1,7 @@ use crate::agent::Agent; use crate::llm::AnthropicProvider; +#[cfg(feature = "mcp")] +use crate::mcp::{self, McpRuntimeManager}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::collections::HashMap; @@ -35,6 +37,8 @@ pub struct AcpServer { // TODO(v0.2): add session TTL and periodic cleanup to prevent OOM sessions: HashMap, working_dir: String, + #[cfg(feature = "mcp")] + mcp_manager: McpRuntimeManager, } impl AcpServer { @@ -44,6 +48,8 @@ impl AcpServer { working_dir: std::env::current_dir() .map(|p| p.to_string_lossy().to_string()) .unwrap_or_else(|_| "/tmp".to_string()), + #[cfg(feature = "mcp")] + mcp_manager: mcp::load_runtime_or_warn(), } } @@ -154,7 +160,12 @@ impl AcpServer { } }; - let agent = Agent::new_boxed(provider, self.working_dir.clone()); + let agent = Agent::new_boxed( + provider, + self.working_dir.clone(), + #[cfg(feature = "mcp")] + Some(self.mcp_manager.clone()), + ); self.sessions.insert(session_id.clone(), agent); let resp = JsonRpcResponse { jsonrpc: "2.0", diff --git a/openab-agent/src/agent.rs b/openab-agent/src/agent.rs index b4a32d722..ad09eb63a 100644 --- a/openab-agent/src/agent.rs +++ b/openab-agent/src/agent.rs @@ -1,14 +1,18 @@ use anyhow::Result; +#[cfg(feature = "mcp")] +use serde::Deserialize; use std::path::PathBuf; use tracing::{debug, info}; use crate::llm::{ContentBlock, LlmEvent, LlmProvider, Message, ToolDef}; +#[cfg(feature = "mcp")] +use crate::mcp::{self, McpRuntimeManager}; use crate::skills; use crate::tools; const SYSTEM_PROMPT: &str = r#"You are openab-agent, a coding assistant. You help users by reading, writing, and editing files, and running shell commands. -You have 4 tools available: +You have these tools available: - read: Read file contents or list a directory - write: Create or overwrite a file - edit: Replace a string in a file (first occurrence) @@ -16,6 +20,12 @@ You have 4 tools available: Be direct and concise. Execute tasks immediately rather than explaining what you would do. When you need to understand code, read the relevant files first."#; +#[cfg(feature = "mcp")] +const MCP_SYSTEM_PROMPT_APPENDIX: &str = "\n\nAdditional tool:\n\ + - mcp: Talk to configured MCP servers. Always call `mcp(action=\"help\")` \ + first to learn the action surface, then `mcp(action=\"list_servers\")` to see \ + what's configured before calling tools."; + const MAX_TOOL_LOOPS: usize = 50; /// Maximum number of messages to keep in context. When exceeded, oldest /// messages (excluding the first user message) are dropped. @@ -27,45 +37,70 @@ pub struct Agent { working_dir: PathBuf, system_prompt: String, tools: Vec, + #[cfg(feature = "mcp")] + mcp_manager: Option, } impl Agent { #[cfg(test)] pub fn new(provider: impl LlmProvider + 'static, working_dir: String) -> Self { - let system_prompt = Self::build_system_prompt(&working_dir); + let system_prompt = Self::build_system_prompt(&working_dir, false); Self { provider: Box::new(provider), messages: Vec::new(), working_dir: PathBuf::from(working_dir), system_prompt, tools: tools::tool_definitions(), + #[cfg(feature = "mcp")] + mcp_manager: None, } } - pub fn new_boxed(provider: Box, working_dir: String) -> Self { - let system_prompt = Self::build_system_prompt(&working_dir); + pub fn new_boxed( + provider: Box, + working_dir: String, + #[cfg(feature = "mcp")] mcp_manager: Option, + ) -> Self { + #[cfg(feature = "mcp")] + let has_mcp = mcp_manager.is_some(); + #[cfg(not(feature = "mcp"))] + let has_mcp = false; + let system_prompt = Self::build_system_prompt(&working_dir, has_mcp); + let mut tools = tools::tool_definitions(); + #[cfg(feature = "mcp")] + if mcp_manager.is_some() { + tools.push(mcp::mcp_tool_def()); + } Self { provider, messages: Vec::new(), working_dir: PathBuf::from(working_dir), system_prompt, - tools: tools::tool_definitions(), + tools, + #[cfg(feature = "mcp")] + mcp_manager, } } /// Run the agent with a user prompt, executing tool calls until completion. /// Returns the final text response. - fn build_system_prompt(working_dir: &str) -> String { + #[cfg_attr(not(feature = "mcp"), allow(unused_variables))] + fn build_system_prompt(working_dir: &str, mcp_enabled: bool) -> String { let wd = std::path::Path::new(working_dir); let agents_md = wd.join("AGENTS.md"); let custom = std::fs::read_to_string(&agents_md).unwrap_or_default(); - let base = if custom.is_empty() { + let mut base = if custom.is_empty() { SYSTEM_PROMPT.to_string() } else { format!("{}\n\n---\n\n{}", custom.trim(), SYSTEM_PROMPT) }; + #[cfg(feature = "mcp")] + if mcp_enabled { + base.push_str(MCP_SYSTEM_PROMPT_APPENDIX); + } + let discovered = skills::discover_skills(wd); if discovered.is_empty() { base @@ -140,7 +175,7 @@ impl Agent { let mut tool_results: Vec = Vec::new(); for (id, name, input) in &tool_calls { info!("executing tool: {name}"); - let result = tools::execute_tool(name, input, &self.working_dir).await; + let result = self.execute_tool_call(name, input).await; match result { Ok(output) => { tool_results.push(ContentBlock::ToolResult { @@ -184,6 +219,26 @@ impl Agent { } } + /// Route the `mcp` meta-tool to the MCP runtime when configured; + /// everything else goes to the stateless `tools::execute_tool`. Keeping + /// the routing here (rather than inside `tools.rs`) lets `tools.rs` stay + /// stateless and free of MCP/feature plumbing. + async fn execute_tool_call(&self, name: &str, input: &serde_json::Value) -> Result { + #[cfg(feature = "mcp")] + if name == mcp::MCP_TOOL_NAME { + let Some(manager) = self.mcp_manager.as_ref() else { + return Err(anyhow::anyhow!( + "mcp tool invoked but no McpRuntimeManager configured" + )); + }; + let action = mcp::meta_tool::Action::deserialize(input) + .map_err(|e| anyhow::anyhow!("invalid mcp action payload: {e}"))?; + let value = mcp::meta_tool::dispatch(manager, action).await?; + return Ok(serde_json::to_string(&value)?); + } + tools::execute_tool(name, input, &self.working_dir).await + } + async fn call_llm(&self) -> Result> { self.provider .chat(&self.system_prompt, &self.messages, &self.tools) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 2588c97bc..557badf4c 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -12,7 +12,6 @@ use super::runtime::{McpRuntimeManager, ServerStatus}; /// Deserialized form of the meta-tool's input JSON (ADR §5.2). The LLM /// sends `{ "action": "...", ... }`; `tag = "action"` routes by that field. -#[allow(dead_code)] // wired into agent.rs execute_tool dispatch in the next slice #[derive(Debug, Deserialize)] #[serde(tag = "action", rename_all = "snake_case")] pub enum Action { @@ -39,7 +38,6 @@ pub enum Action { /// Entry point — the LLM tool dispatcher hands us a deserialized `Action` /// and we return the JSON payload that becomes the tool result. -#[allow(dead_code)] // wired into agent.rs execute_tool dispatch in the next slice pub async fn dispatch(manager: &McpRuntimeManager, action: Action) -> Result { match action { Action::Help => Ok(json!(HELP)), diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index b7b7b8f78..2bd9dadee 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -4,8 +4,52 @@ pub mod config; pub mod meta_tool; pub mod runtime; +use serde_json::json; + +use crate::llm::ToolDef; use config::{McpConfig, ServerConfig}; -use runtime::McpRuntimeManager; + +pub use runtime::McpRuntimeManager; + +/// Shared tool name used by `mcp_tool_def()` and the agent dispatch arm — +/// keeps the implicit contract between the two call sites explicit. +pub const MCP_TOOL_NAME: &str = "mcp"; + +/// The single `mcp` tool definition the LLM sees (ADR §5.2). The schema is +/// intentionally permissive on the per-action fields — the LLM should call +/// `mcp(action="help")` first to learn the action-specific contract. +pub fn mcp_tool_def() -> ToolDef { + ToolDef { + name: MCP_TOOL_NAME.to_string(), + description: "Talk to configured MCP servers. Call with \ + {action: 'help'} first to see the available actions \ + (help, list_servers, list_tools, describe_tool, call, status)." + .to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["help", "list_servers", "list_tools", + "describe_tool", "call", "status"], + "description": "Which meta-tool action to invoke" + }, + "server": { + "type": "string", + "description": "Server name (required by list_tools / describe_tool / call; optional filter for status)" + }, + "tool": { + "type": "string", + "description": "Tool name on the server (required by describe_tool / call)" + }, + "arguments": { + "description": "Tool arguments for call — JSON object, or null/omitted for no-arg tools" + } + }, + "required": ["action"] + }), + } +} fn load_config_or_exit() -> McpConfig { McpConfig::load().unwrap_or_else(|e| { @@ -14,6 +58,18 @@ fn load_config_or_exit() -> McpConfig { }) } +/// Construct an `McpRuntimeManager` from on-disk config, falling back to an +/// empty manager (with a `tracing::warn!`) on parse failure. Long-running +/// servers (ACP, future HTTP) call this so a malformed `mcp.json` cannot +/// kill the host process — CLI subcommands use `load_config_or_exit` instead. +pub fn load_runtime_or_warn() -> McpRuntimeManager { + let cfg = McpConfig::load().unwrap_or_else(|e| { + tracing::warn!("mcp config failed to load, starting with no servers: {e:#}"); + McpConfig::default() + }); + McpRuntimeManager::from_config(cfg) +} + /// `openab-agent mcp list [--resolve]`. /// /// Default: print configs verbatim (`${env:VAR}` placeholders kept as-is) so From 6e08c284102c4a84dc9348fcfba96f7a277d2612 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:45:33 +0000 Subject: [PATCH 29/54] fix(openab-agent/mcp): silence unused_mut on feature-off build The `mut` on `tools` and `base` is consumed only inside `#[cfg(feature = "mcp")]` blocks, so the no-feature build saw them as unused. Gate `allow(unused_mut)` behind `cfg_attr(not(feature = "mcp"), ...)` so both builds stay clean. --- openab-agent/src/agent.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openab-agent/src/agent.rs b/openab-agent/src/agent.rs index ad09eb63a..419e91f72 100644 --- a/openab-agent/src/agent.rs +++ b/openab-agent/src/agent.rs @@ -66,6 +66,7 @@ impl Agent { #[cfg(not(feature = "mcp"))] let has_mcp = false; let system_prompt = Self::build_system_prompt(&working_dir, has_mcp); + #[cfg_attr(not(feature = "mcp"), allow(unused_mut))] let mut tools = tools::tool_definitions(); #[cfg(feature = "mcp")] if mcp_manager.is_some() { @@ -90,6 +91,7 @@ impl Agent { let agents_md = wd.join("AGENTS.md"); let custom = std::fs::read_to_string(&agents_md).unwrap_or_default(); + #[cfg_attr(not(feature = "mcp"), allow(unused_mut))] let mut base = if custom.is_empty() { SYSTEM_PROMPT.to_string() } else { From a0ebd5f4d2b3ee37f460c622814d93321c8bfa20 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 21:53:28 +0000 Subject: [PATCH 30/54] fix(openab-agent/acp): serialize env-var-touching tests with Mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `test_session_new` and `test_session_new_missing_key` both mutate `ANTHROPIC_API_KEY`. cargo runs tests in parallel, so a `set` from one thread can be observed by the other before the first reads it. The pre-existing race was timing-tight enough to pass historically; adding `mcp::load_runtime_or_warn()` to `AcpServer::new()` widened the gap between set and read enough to flip the race deterministic. Guarding both tests with a shared `Mutex<()>` serializes them without adding a `serial_test` dep. Poisoned guard is recovered via `into_inner()` — we don't care if a prior assertion panicked. --- openab-agent/src/acp.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/openab-agent/src/acp.rs b/openab-agent/src/acp.rs index 8529f7e82..3556b3571 100644 --- a/openab-agent/src/acp.rs +++ b/openab-agent/src/acp.rs @@ -256,6 +256,13 @@ impl AcpServer { #[cfg(test)] mod tests { use super::*; + use std::sync::Mutex; + + /// Serializes tests that mutate process-global env vars (notably + /// `ANTHROPIC_API_KEY`). Without this, `test_session_new` and + /// `test_session_new_missing_key` race on the same key when run in + /// parallel — set/remove from one thread is observed by the other. + static ENV_LOCK: Mutex<()> = Mutex::new(()); #[test] fn test_initialize_response() { @@ -270,6 +277,7 @@ mod tests { #[test] fn test_session_new() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); // Set a fake key so from_env() succeeds in CI unsafe { std::env::set_var("ANTHROPIC_API_KEY", "test-key") }; let mut server = AcpServer::new(); @@ -282,6 +290,7 @@ mod tests { #[test] fn test_session_new_missing_key() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); // Ensure no OAuth token exists either let auth_path = std::path::PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string())) From f405646b7aa0137179db33ff39dbd1e53c6ff027 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:04:24 +0000 Subject: [PATCH 31/54] refactor(openab-agent/mcp): apply Mira's Phase 1 polish review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-ups from Mira's Tick 24 review: 1. `load_runtime_or_warn()` now returns `Option` — `None` when `cfg.servers` is empty so callers skip the entire MCP path (saves system-prompt tokens; LLM doesn't see an empty tool surface and try to use it). `AcpServer.mcp_manager` shape follows. 2. Replaced `let mut + push_str` on `base` with shadowing via `#[cfg(feature = "mcp")] let base = ...` — drops the `cfg_attr(allow(unused_mut))` markers entirely. The `tools` Vec uses two cfg arms (a single shared `let mut t = ...` would still trigger `unused_mut` on no-feature builds because the only write is cfg-gated). 3. Reordered the system-prompt appendix to lead with `list_servers` instead of `help`. Saves the LLM one tool-call round-trip when the user's request requires picking a configured server. --- openab-agent/src/acp.rs | 4 ++-- openab-agent/src/agent.rs | 35 +++++++++++++++++++++-------------- openab-agent/src/mcp/mod.rs | 18 ++++++++++++------ 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/openab-agent/src/acp.rs b/openab-agent/src/acp.rs index 3556b3571..5d7f4c412 100644 --- a/openab-agent/src/acp.rs +++ b/openab-agent/src/acp.rs @@ -38,7 +38,7 @@ pub struct AcpServer { sessions: HashMap, working_dir: String, #[cfg(feature = "mcp")] - mcp_manager: McpRuntimeManager, + mcp_manager: Option, } impl AcpServer { @@ -164,7 +164,7 @@ impl AcpServer { provider, self.working_dir.clone(), #[cfg(feature = "mcp")] - Some(self.mcp_manager.clone()), + self.mcp_manager.clone(), ); self.sessions.insert(session_id.clone(), agent); let resp = JsonRpcResponse { diff --git a/openab-agent/src/agent.rs b/openab-agent/src/agent.rs index 419e91f72..63f240d61 100644 --- a/openab-agent/src/agent.rs +++ b/openab-agent/src/agent.rs @@ -22,9 +22,10 @@ Be direct and concise. Execute tasks immediately rather than explaining what you #[cfg(feature = "mcp")] const MCP_SYSTEM_PROMPT_APPENDIX: &str = "\n\nAdditional tool:\n\ - - mcp: Talk to configured MCP servers. Always call `mcp(action=\"help\")` \ - first to learn the action surface, then `mcp(action=\"list_servers\")` to see \ - what's configured before calling tools."; + - mcp: Talk to configured MCP servers. Call `mcp(action=\"list_servers\")` \ + to see what's configured, then `mcp(action=\"list_tools\", server=...)` to \ + discover per-server tools. Use `mcp(action=\"help\")` only if action shapes \ + are unclear."; const MAX_TOOL_LOOPS: usize = 50; /// Maximum number of messages to keep in context. When exceeded, oldest @@ -66,12 +67,16 @@ impl Agent { #[cfg(not(feature = "mcp"))] let has_mcp = false; let system_prompt = Self::build_system_prompt(&working_dir, has_mcp); - #[cfg_attr(not(feature = "mcp"), allow(unused_mut))] - let mut tools = tools::tool_definitions(); #[cfg(feature = "mcp")] - if mcp_manager.is_some() { - tools.push(mcp::mcp_tool_def()); - } + let tools = { + let mut t = tools::tool_definitions(); + if mcp_manager.is_some() { + t.push(mcp::mcp_tool_def()); + } + t + }; + #[cfg(not(feature = "mcp"))] + let tools = tools::tool_definitions(); Self { provider, messages: Vec::new(), @@ -85,23 +90,25 @@ impl Agent { /// Run the agent with a user prompt, executing tool calls until completion. /// Returns the final text response. - #[cfg_attr(not(feature = "mcp"), allow(unused_variables))] fn build_system_prompt(working_dir: &str, mcp_enabled: bool) -> String { + #[cfg(not(feature = "mcp"))] + let _ = mcp_enabled; let wd = std::path::Path::new(working_dir); let agents_md = wd.join("AGENTS.md"); let custom = std::fs::read_to_string(&agents_md).unwrap_or_default(); - #[cfg_attr(not(feature = "mcp"), allow(unused_mut))] - let mut base = if custom.is_empty() { + let base = if custom.is_empty() { SYSTEM_PROMPT.to_string() } else { format!("{}\n\n---\n\n{}", custom.trim(), SYSTEM_PROMPT) }; #[cfg(feature = "mcp")] - if mcp_enabled { - base.push_str(MCP_SYSTEM_PROMPT_APPENDIX); - } + let base = if mcp_enabled { + format!("{base}{MCP_SYSTEM_PROMPT_APPENDIX}") + } else { + base + }; let discovered = skills::discover_skills(wd); if discovered.is_empty() { diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 2bd9dadee..55f210c16 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -58,16 +58,22 @@ fn load_config_or_exit() -> McpConfig { }) } -/// Construct an `McpRuntimeManager` from on-disk config, falling back to an -/// empty manager (with a `tracing::warn!`) on parse failure. Long-running -/// servers (ACP, future HTTP) call this so a malformed `mcp.json` cannot -/// kill the host process — CLI subcommands use `load_config_or_exit` instead. -pub fn load_runtime_or_warn() -> McpRuntimeManager { +/// Construct an `McpRuntimeManager` from on-disk config — returns `None` +/// when no servers are configured so callers can skip the entire MCP path +/// (saves system-prompt tokens + keeps the LLM from hallucinating an empty +/// tool surface). Parse failure falls back to `None` with a `tracing::warn!`. +/// Long-running servers (ACP, future HTTP) call this; CLI subcommands use +/// `load_config_or_exit` instead. +pub fn load_runtime_or_warn() -> Option { let cfg = McpConfig::load().unwrap_or_else(|e| { tracing::warn!("mcp config failed to load, starting with no servers: {e:#}"); McpConfig::default() }); - McpRuntimeManager::from_config(cfg) + if cfg.servers.is_empty() { + None + } else { + Some(McpRuntimeManager::from_config(cfg)) + } } /// `openab-agent mcp list [--resolve]`. From 9aa51a13166ce23eb8880149ac01fc62b3172b8e Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:19:07 +0000 Subject: [PATCH 32/54] feat(openab-agent/mcp): wire anonymous Streamable HTTP transport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 first slice (ADR §5.7, §6). HTTP servers without `oauth` now dial through rmcp's `StreamableHttpClientTransport::from_uri`; servers that DO declare `oauth` are rejected before the `Connecting` state transition so the runtime never advertises an attempt that wasn't made. The OAuth-protected reject path makes the gap between Phase 2-anon and Phase 2-auth explicit — `mcp login` lands in the next slice. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/runtime.rs | 102 ++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 24 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 5fa41c8d3..933fd23d5 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use rmcp::service::{RoleClient, RunningService}; -use rmcp::transport::{ConfigureCommandExt, TokioChildProcess}; +use rmcp::transport::{ConfigureCommandExt, StreamableHttpClientTransport, TokioChildProcess}; use rmcp::ServiceExt; use tokio::process::Command; use tokio::sync::RwLock; @@ -147,7 +147,8 @@ impl McpRuntimeManager { } /// Lazy-connect the named server (ADR §5.7). Idempotent if already - /// `Connected` with a live client. HTTP transport is Phase 2. + /// `Connected` with a live client. HTTP servers requiring OAuth are + /// rejected until the Phase 2 auth slice lands (ADR §6). pub async fn connect(&self, name: &str) -> Result<()> { let dial = { let mut guard = self.handles.write().await; @@ -161,10 +162,21 @@ impl McpRuntimeManager { let dial = match resolved { ServerConfig::Stdio { command, args, env, .. - } => StdioDial { command, args, env }, - ServerConfig::Http { .. } => { - return Err(anyhow!("http transport lands in phase 2 (server {name:?})")); + } => Dial::Stdio { command, args, env }, + // Reject oauth-protected servers BEFORE the `Connecting` + // transition: we never attempted a handshake, so leaving + // status at `Disconnected` is the honest state. Status + // becomes `Failed` only when a dial was actually tried. + ServerConfig::Http { + oauth: Some(_), + url, + .. + } => { + return Err(anyhow!( + "oauth-protected http server {url:?} requires the auth slice (Phase 2 §6)" + )); } + ServerConfig::Http { url, .. } => Dial::Http { url }, }; handle.status = ServerStatus::Connecting; dial @@ -197,24 +209,41 @@ impl McpRuntimeManager { } } -struct StdioDial { - command: String, - args: Vec, - env: HashMap, +/// Per-transport dial parameters, extracted under the manager's write lock +/// then dialed without holding the lock. Flat (no nested `*Dial` structs) +/// because two variants don't warrant a dispatch enum. +enum Dial { + Stdio { + command: String, + args: Vec, + env: HashMap, + }, + Http { + url: String, + }, } -impl StdioDial { +impl Dial { async fn run(self) -> Result> { - let Self { command, args, env } = self; - let cmd = Command::new(&command).configure(|c| { - c.args(&args); - c.envs(&env); - }); - let transport = TokioChildProcess::new(cmd) - .with_context(|| format!("spawn mcp child process {command:?}"))?; - ().serve(transport) - .await - .with_context(|| format!("mcp handshake with {command:?}")) + match self { + Dial::Stdio { command, args, env } => { + let cmd = Command::new(&command).configure(|c| { + c.args(&args); + c.envs(&env); + }); + let transport = TokioChildProcess::new(cmd) + .with_context(|| format!("spawn mcp child process {command:?}"))?; + ().serve(transport) + .await + .with_context(|| format!("mcp handshake with {command:?}")) + } + Dial::Http { url } => { + let transport = StreamableHttpClientTransport::from_uri(url.as_str()); + ().serve(transport) + .await + .with_context(|| format!("mcp handshake with {url:?}")) + } + } } } @@ -269,20 +298,45 @@ mod tests { } #[tokio::test] - async fn connect_http_returns_phase2_error() { + async fn connect_http_with_oauth_defers_to_auth_slice() { let json = r#"{ "mcpServers": { - "linear": { "type": "http", "url": "https://mcp.linear.app/mcp" } + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + } } }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); - assert!(err.contains("phase 2"), "expected 'phase 2' in {err}"); - // Status not advanced past Disconnected for unsupported transports. + assert!(err.contains("oauth"), "expected 'oauth' in {err}"); + // OAuth rejection happens BEFORE the Connecting transition, so the + // server remains Disconnected — no dial was attempted. assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); } + #[tokio::test] + async fn connect_http_anonymous_to_dead_address_records_failed() { + // 127.0.0.1:1 is a TCP port that no MCP server will ever bind. The + // handshake `.serve()` future fails fast at the connect() syscall, + // so this test stays hermetic — no network reachability assumed. + let json = r#"{ + "mcpServers": { + "dead": { "type": "http", "url": "http://127.0.0.1:1/mcp" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = mgr.connect("dead").await.unwrap_err().to_string(); + assert!(err.contains("handshake"), "expected 'handshake' in {err}"); + match &mgr.statuses().await[0].1 { + ServerStatus::Failed(_) => {} + other => panic!("expected Failed, got {other:?}"), + } + } + #[tokio::test] async fn connect_to_missing_binary_records_failed() { let json = r#"{ From 2a69bf9d9933c4cec69b48a53bf3dd2e1945aa4a Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:25:42 +0000 Subject: [PATCH 33/54] =?UTF-8?q?feat(openab-agent/auth):=20namespaced=20T?= =?UTF-8?q?okenStore=20+=20fsync=20(ADR=20=C2=A76.1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 §6.1 foundation. `auth.json` switches from a bare `TokenStore` to `HashMap` so MCP server credentials can sit alongside the existing Codex slot (`mcp:` vs `codex`). Legacy single-tenant files migrate transparently on read (discriminated by the top-level `access_token` key); the on-disk shape rewrites to the new layout on the next save. Codex login flows keep their existing public API — `load_tokens`/`save_tokens` now route through the codex namespace internally. `fsync(2)` lands on every write per the refresh-token rotation race contract: without it, a Spot interruption between local write and S3 sync could restore a revoked refresh token from durable storage. Public helpers for the MCP path (`load_/save_/remove_namespaced_token`) are feature-gated and `#[allow(dead_code)]` until the `mcp/oauth.rs` slice lands. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 173 +++++++++++++++++++++++++++++++++++---- 1 file changed, 158 insertions(+), 15 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 385ccede9..f1abacc76 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -2,11 +2,16 @@ use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; +use std::collections::HashMap; use std::io::{BufRead, Write}; use std::net::TcpListener; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::time::{SystemTime, UNIX_EPOCH}; +/// Namespace key for the existing Codex single-tenant credential. +/// Lives next to future `mcp:` entries inside `auth.json`. +const CODEX_NAMESPACE: &str = "codex"; + const REFRESH_SKEW_SECONDS: u64 = 120; const CODEX_AUTHORIZE_URL: &str = "https://auth.openai.com/oauth/authorize"; @@ -42,23 +47,36 @@ fn auth_path() -> PathBuf { .join("auth.json") } -pub fn load_tokens() -> Result { - let path = auth_path(); - let data = std::fs::read_to_string(&path).map_err(|_| { - anyhow!( - "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", - path.display() - ) - })?; - serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}")) +/// Read the `auth.json` map, transparently migrating a legacy single-tenant +/// Codex token file into the new namespaced shape. The migrated map is held +/// in-memory only; the file is rewritten in the new shape on the next save. +/// +/// Discriminates by the top-level `access_token` key — present means the +/// file is the legacy `TokenStore` shape, absent means the new namespaced +/// map. A single JSON parse gives accurate error context either way. +fn read_auth_file(path: &Path) -> Result> { + let data = std::fs::read_to_string(path)?; + let value: serde_json::Value = + serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}"))?; + if value.get("access_token").is_some() { + let legacy: TokenStore = serde_json::from_value(value) + .map_err(|e| anyhow!("Invalid auth.json (legacy format): {e}"))?; + let mut map = HashMap::new(); + map.insert(CODEX_NAMESPACE.to_string(), legacy); + return Ok(map); + } + serde_json::from_value(value).map_err(|e| anyhow!("Invalid auth.json: {e}")) } -fn save_tokens(store: &TokenStore) -> Result<()> { - let path = auth_path(); +/// Atomically replace `auth.json` with the new map. `fsync(2)` after write +/// satisfies the ADR §6.1 refresh-token rotation contract — without it, a +/// Spot interruption between local write and S3 sync would restore a +/// revoked refresh token from durable storage on the next task start. +fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { if let Some(dir) = path.parent() { std::fs::create_dir_all(dir)?; } - let data = serde_json::to_string_pretty(store)?; + let data = serde_json::to_string_pretty(map)?; #[cfg(unix)] { use std::fs::OpenOptions; @@ -69,16 +87,87 @@ fn save_tokens(store: &TokenStore) -> Result<()> { .create(true) .truncate(true) .mode(0o600) - .open(&path)?; + .open(path)?; file.write_all(data.as_bytes())?; + file.sync_all()?; } #[cfg(not(unix))] { - std::fs::write(&path, &data)?; + std::fs::write(path, &data)?; } Ok(()) } +pub fn load_tokens() -> Result { + let path = auth_path(); + let map = read_auth_file(&path).map_err(|_| { + anyhow!( + "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", + path.display() + ) + })?; + map.get(CODEX_NAMESPACE).cloned().ok_or_else(|| { + anyhow!( + "No codex credentials in {}. Run `openab-agent auth codex-oauth` first.", + path.display() + ) + }) +} + +fn save_tokens(store: &TokenStore) -> Result<()> { + let path = auth_path(); + let mut map = read_auth_file(&path).unwrap_or_default(); + map.insert(CODEX_NAMESPACE.to_string(), store.clone()); + write_auth_file(&path, &map) +} + +/// Look up the credential at `key` (e.g. `mcp:linear`). Returns the codex +/// entry for `key = "codex"`, but prefer `load_tokens()` for that path — +/// this helper exists for MCP server-namespaced lookups (ADR §6.1). +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) +pub fn load_namespaced_token(key: &str) -> Result { + let path = auth_path(); + let map = read_auth_file(&path) + .map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + map.get(key) + .cloned() + .ok_or_else(|| anyhow!("no credentials stored for {key:?}")) +} + +/// Insert or replace the credential at `key`, preserving all other entries. +/// Read-modify-write on a single file: callers in the same process must +/// serialize themselves (the lifecycle manager already does per ADR §5.7). +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) +pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { + let path = auth_path(); + let mut map = read_auth_file(&path).unwrap_or_default(); + map.insert(key.to_string(), store.clone()); + write_auth_file(&path, &map) +} + +/// Remove the credential at `key`. Idempotent — missing key is not an +/// error. If the map becomes empty, the file is deleted so `mcp doctor` +/// can report "no credentials" instead of "empty file". +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (mcp logout / revoked-refresh recovery) +pub fn remove_namespaced_token(key: &str) -> Result<()> { + let path = auth_path(); + let mut map = match read_auth_file(&path) { + Ok(m) => m, + Err(_) => return Ok(()), + }; + if map.remove(key).is_none() { + return Ok(()); + } + if map.is_empty() { + let _ = std::fs::remove_file(&path); + return Ok(()); + } + write_auth_file(&path, &map) +} + fn is_expired(store: &TokenStore) -> bool { let now = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -535,4 +624,58 @@ mod tests { let expected = URL_SAFE_NO_PAD.encode(Sha256::digest(verifier.as_bytes())); assert_eq!(challenge, expected); } + + #[test] + fn read_auth_file_migrates_legacy_single_tenant_format() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let legacy = serde_json::to_string_pretty(&make_store(9_999_999_999)).unwrap(); + std::fs::write(&path, legacy).unwrap(); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.len(), 1); + assert_eq!( + map.get(CODEX_NAMESPACE).unwrap().access_token, + "test_access_token_value" + ); + } + + #[test] + fn read_auth_file_parses_new_namespaced_format() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), make_store(1)); + input.insert("mcp:linear".to_string(), make_store(2)); + write_auth_file(&path, &input).unwrap(); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.len(), 2); + assert_eq!(map.get("codex").unwrap().expires_at, 1); + assert_eq!(map.get("mcp:linear").unwrap().expires_at, 2); + } + + #[test] + fn write_auth_file_round_trips_through_disk() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("mcp:github".to_string(), make_store(42)); + write_auth_file(&path, &input).unwrap(); + let raw = std::fs::read_to_string(&path).unwrap(); + assert!(raw.contains("mcp:github")); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.get("mcp:github").unwrap().expires_at, 42); + } + + #[cfg(unix)] + #[test] + fn write_auth_file_creates_file_with_0600_mode() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), make_store(0)); + write_auth_file(&path, &input).unwrap(); + let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777; + assert_eq!(mode, 0o600, "expected 0600, got {mode:o}"); + } } From 06acde6ac330f4a7400e3bfb8754e83f312e6ba1 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:31:46 +0000 Subject: [PATCH 34/54] fix(openab-agent/auth): rustfmt break-after-= for long chained let MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI cargo fmt --check rejected the chain-break form (`let map = read_auth_file(&path)\n .map_err(...)`) — for a single-method chain that fits inline at 99 chars, rustfmt prefers breaking after the `=` and keeping the chain whole. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index f1abacc76..b83179e15 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -128,8 +128,8 @@ fn save_tokens(store: &TokenStore) -> Result<()> { #[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) pub fn load_namespaced_token(key: &str) -> Result { let path = auth_path(); - let map = read_auth_file(&path) - .map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + let map = + read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; map.get(key) .cloned() .ok_or_else(|| anyhow!("no credentials stored for {key:?}")) From 4c77595003b0b7e6b4f12ac5c33a6cda6732036f Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:46:43 +0000 Subject: [PATCH 35/54] =?UTF-8?q?feat(openab-agent/mcp):=20add=20OAuth=20p?= =?UTF-8?q?rovider=20catalog=20(ADR=20=C2=A76.2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Built-in ProviderSpec table + resolve() that hands callers a (spec, scopes) pair. Empty cfg.scopes falls back to spec defaults; non-empty replaces them entirely. Custom providers (§6.3) deferred — anything not in the built-in list errors out for now. Module-level #![allow(dead_code)] since the first prod caller is the §6.4 login flow slice; until then only the unit tests below exercise this code. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/mod.rs | 1 + openab-agent/src/mcp/oauth.rs | 139 ++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 openab-agent/src/mcp/oauth.rs diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 55f210c16..17884c9c1 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -2,6 +2,7 @@ pub mod config; pub mod meta_tool; +pub mod oauth; pub mod runtime; use serde_json::json; diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs new file mode 100644 index 000000000..e05f7d77e --- /dev/null +++ b/openab-agent/src/mcp/oauth.rs @@ -0,0 +1,139 @@ +//! OAuth provider catalog (ADR §6.2). Wiring into the rmcp Streamable HTTP +//! transport + agent-guided flows (§6.4) lands in subsequent slices; this +//! module is the data layer the login / refresh code will dispatch through. +//! +//! Scopes are stored as `&'static [&'static str]` so callers can join them +//! with the space delimiter the OAuth 2.1 spec mandates without owning a +//! `Vec`. Per-server overrides (`OAuthConfig.scopes`) replace the defaults +//! and pay for a `Vec` at the boundary. + +// The §6.4 login slice is the first prod caller — until then, every item +// here is reachable only via the unit tests below, so `cargo clippy +// --features mcp -- -D warnings` would flag them as dead. Module-scope +// allow rather than per-item once that slice lands. +#![allow(dead_code)] + +use anyhow::{anyhow, Result}; + +use super::config::OAuthConfig; + +/// Static description of a single OAuth provider — URLs + the loopback +/// redirect the §6.4 browser flow listens on. `default_scopes` is the +/// minimum set the agent will request when `oauth.scopes` is omitted +/// from the server config; per-server overrides win when present. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ProviderSpec { + pub authorize_url: &'static str, + pub token_url: &'static str, + pub callback: &'static str, + pub default_scopes: &'static [&'static str], +} + +/// Anthropic MCP (claude.ai). Scope list from ADR §6.2 — `org:create_api_key` +/// is the broadest grant; consumers should narrow via per-server overrides. +pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { + authorize_url: "https://claude.ai/oauth/authorize", + token_url: "https://platform.claude.com/v1/oauth/token", + callback: "http://localhost:53692/callback", + default_scopes: &[ + "org:create_api_key", + "user:profile", + "user:inference", + "user:sessions:claude_code", + "user:mcp_servers", + "user:file_upload", + ], +}; + +/// Look up a built-in `ProviderSpec` by config name. Returns `None` for +/// custom providers (handled by §6.3 once `OAuthConfig` grows the URL +/// fields) and for unknown names. +pub fn builtin(name: &str) -> Option { + match name { + "anthropic-mcp" => Some(ANTHROPIC_MCP), + _ => None, + } +} + +/// Resolve a server's `oauth:` block to a `ProviderSpec` plus the effective +/// scope list. `OAuthConfig::scopes`, when non-empty, replaces the spec's +/// defaults entirely — the caller never needs to merge. +/// +/// Custom providers (per ADR §6.3) require `OAuthConfig` to grow +/// `authorize_url` / `token_url` fields; until that lands, an `oauth:` +/// block without a known `provider` is an error. +pub fn resolve(cfg: &OAuthConfig) -> Result<(ProviderSpec, Vec)> { + let provider = cfg + .provider + .as_deref() + .ok_or_else(|| anyhow!("oauth.provider is required (custom providers land in §6.3)"))?; + let spec = builtin(provider) + .ok_or_else(|| anyhow!("unknown oauth provider {provider:?} (built-ins: anthropic-mcp)"))?; + let scopes = if cfg.scopes.is_empty() { + spec.default_scopes.iter().map(|s| s.to_string()).collect() + } else { + cfg.scopes.clone() + }; + Ok((spec, scopes)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn anthropic_mcp_spec_matches_adr_table() { + let spec = builtin("anthropic-mcp").expect("anthropic-mcp is built-in"); + assert_eq!(spec.authorize_url, "https://claude.ai/oauth/authorize"); + assert_eq!(spec.token_url, "https://platform.claude.com/v1/oauth/token"); + assert_eq!(spec.callback, "http://localhost:53692/callback"); + assert!(spec.default_scopes.contains(&"user:mcp_servers")); + } + + #[test] + fn unknown_provider_returns_none() { + assert!(builtin("does-not-exist").is_none()); + assert!(builtin("").is_none()); + } + + #[test] + fn resolve_uses_default_scopes_when_config_omits_them() { + let cfg = OAuthConfig { + provider: Some("anthropic-mcp".to_string()), + scopes: vec![], + }; + let (spec, scopes) = resolve(&cfg).unwrap(); + assert_eq!(spec, ANTHROPIC_MCP); + assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); + } + + #[test] + fn resolve_uses_config_scopes_when_provided() { + let cfg = OAuthConfig { + provider: Some("anthropic-mcp".to_string()), + scopes: vec!["user:profile".to_string(), "user:inference".to_string()], + }; + let (_, scopes) = resolve(&cfg).unwrap(); + assert_eq!(scopes, vec!["user:profile", "user:inference"]); + } + + #[test] + fn resolve_rejects_missing_provider() { + let cfg = OAuthConfig { + provider: None, + scopes: vec![], + }; + let err = resolve(&cfg).unwrap_err().to_string(); + assert!(err.contains("required"), "got: {err}"); + } + + #[test] + fn resolve_rejects_unknown_provider() { + let cfg = OAuthConfig { + provider: Some("github-copilot".to_string()), + scopes: vec![], + }; + let err = resolve(&cfg).unwrap_err().to_string(); + assert!(err.contains("unknown oauth provider"), "got: {err}"); + } +} From da86d5be85d3508ad716621137a24a3b0259ba64 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 22:55:31 +0000 Subject: [PATCH 36/54] =?UTF-8?q?feat(openab-agent/mcp):=20OAuthConfig=20?= =?UTF-8?q?=C2=A76.3=20fields=20+=20discovery=20boot=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends OAuthConfig with authorize_url / token_url / client_id / device_authorization_endpoint / discovery / discovery_allowlist so custom OAuth 2.1 providers can be declared inline. Adds validate() that rejects discovery=true without an explicit allowlist (RFC 8414 SSRF guard, ADR §6.3 / §6.4) and hooks it into load_layered. oauth.rs tests switch to ..Default::default() so future field additions don't churn the test struct literals. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 130 ++++++++++++++++++++++++++++++++- openab-agent/src/mcp/oauth.rs | 10 +-- 2 files changed, 130 insertions(+), 10 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 742459430..6509837a7 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -57,15 +57,46 @@ pub struct ToolFilter { pub exclude: Vec, } -/// OAuth block. Phase 1 only parses `provider` + `scopes`; custom-provider -/// fields (§6.3: `authorize_url`, `token_url`, `device_authorization_endpoint`, -/// `discovery`, `discovery_allowlist`) land with the Phase 2 auth slice. -#[derive(Debug, Clone, Serialize, Deserialize)] +/// OAuth block. +/// +/// `provider` selects a built-in spec from `oauth::builtin()`. Setting it +/// to an unknown name + supplying `authorize_url` / `token_url` defines a +/// custom OAuth 2.1 provider (ADR §6.3). `discovery: true` opts into +/// RFC 8414 dynamic discovery and requires a non-empty +/// `discovery_allowlist` of domains (§6.4 SSRF guard). +#[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct OAuthConfig { #[serde(default)] pub provider: Option, #[serde(default)] pub scopes: Vec, + #[serde(default)] + pub authorize_url: Option, + #[serde(default)] + pub token_url: Option, + #[serde(default)] + pub client_id: Option, + #[serde(default)] + pub device_authorization_endpoint: Option, + #[serde(default)] + pub discovery: bool, + #[serde(default)] + pub discovery_allowlist: Vec, +} + +impl OAuthConfig { + /// Boot-time validation (ADR §6.3 / §6.4). `discovery: true` without an + /// explicit allowlist is rejected — RFC 8414 lookups in multi-tenant + /// deployments would otherwise become an SSRF vector. + pub fn validate(&self, server: &str) -> Result<()> { + if self.discovery && self.discovery_allowlist.is_empty() { + return Err(anyhow!( + "mcp server {server:?}: oauth.discovery=true requires \ + a non-empty oauth.discovery_allowlist (ADR §6.3)" + )); + } + Ok(()) + } } impl McpConfig { @@ -89,9 +120,21 @@ impl McpConfig { let layer = Self::load_file(path)?; merged.servers.extend(layer.servers); } + merged.validate()?; Ok(merged) } + /// Validate every server's `oauth` block (ADR §6.3 boot check). Returns + /// the first failure — finer-grained per-server isolation lives in §5.6. + pub fn validate(&self) -> Result<()> { + for (name, server) in &self.servers { + if let ServerConfig::Http { oauth: Some(oauth), .. } = server { + oauth.validate(name)?; + } + } + Ok(()) + } + fn load_file(path: &Path) -> Result { let raw = std::fs::read_to_string(path) .with_context(|| format!("read mcp config {}", path.display()))?; @@ -287,4 +330,83 @@ mod tests { _ => unreachable!(), } } + + #[test] + fn parses_custom_oauth_provider_fields() { + let json = r#"{ + "mcpServers": { + "custom": { + "type": "http", + "url": "https://example.com/mcp", + "oauth": { + "provider": "custom", + "authorize_url": "https://example.com/oauth/authorize", + "token_url": "https://example.com/oauth/token", + "client_id": "abc123", + "device_authorization_endpoint": "https://example.com/oauth/device", + "discovery": true, + "discovery_allowlist": ["*.example.com"] + } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let ServerConfig::Http { oauth: Some(oauth), .. } = cfg.servers.get("custom").unwrap() + else { + panic!("expected http with oauth"); + }; + assert_eq!( + oauth.authorize_url.as_deref(), + Some("https://example.com/oauth/authorize"), + ); + assert_eq!( + oauth.token_url.as_deref(), + Some("https://example.com/oauth/token"), + ); + assert_eq!(oauth.client_id.as_deref(), Some("abc123")); + assert_eq!( + oauth.device_authorization_endpoint.as_deref(), + Some("https://example.com/oauth/device"), + ); + assert!(oauth.discovery); + assert_eq!(oauth.discovery_allowlist, vec!["*.example.com".to_string()]); + } + + #[test] + fn validate_rejects_discovery_without_allowlist() { + let oauth = OAuthConfig { + provider: Some("custom".into()), + discovery: true, + ..Default::default() + }; + let err = oauth.validate("srv").unwrap_err().to_string(); + assert!(err.contains("discovery_allowlist"), "got: {err}"); + assert!(err.contains("srv"), "got: {err}"); + } + + #[test] + fn validate_accepts_discovery_with_allowlist() { + let oauth = OAuthConfig { + provider: Some("custom".into()), + discovery: true, + discovery_allowlist: vec!["*.example.com".into()], + ..Default::default() + }; + oauth.validate("srv").unwrap(); + } + + #[test] + fn load_layered_rejects_invalid_discovery_config() { + let dir = tempfile::tempdir().unwrap(); + let project = dir.path().join("project.json"); + std::fs::write( + &project, + r#"{"mcpServers":{"bad":{"type":"http","url":"https://example.com","oauth":{"provider":"custom","discovery":true}}}}"#, + ) + .unwrap(); + let err = McpConfig::load_layered(None, Some(&project)) + .unwrap_err() + .to_string(); + assert!(err.contains("discovery_allowlist"), "got: {err}"); + } } diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index e05f7d77e..e31d9807b 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -100,7 +100,7 @@ mod tests { fn resolve_uses_default_scopes_when_config_omits_them() { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), - scopes: vec![], + ..Default::default() }; let (spec, scopes) = resolve(&cfg).unwrap(); assert_eq!(spec, ANTHROPIC_MCP); @@ -112,6 +112,7 @@ mod tests { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), scopes: vec!["user:profile".to_string(), "user:inference".to_string()], + ..Default::default() }; let (_, scopes) = resolve(&cfg).unwrap(); assert_eq!(scopes, vec!["user:profile", "user:inference"]); @@ -119,10 +120,7 @@ mod tests { #[test] fn resolve_rejects_missing_provider() { - let cfg = OAuthConfig { - provider: None, - scopes: vec![], - }; + let cfg = OAuthConfig::default(); let err = resolve(&cfg).unwrap_err().to_string(); assert!(err.contains("required"), "got: {err}"); } @@ -131,7 +129,7 @@ mod tests { fn resolve_rejects_unknown_provider() { let cfg = OAuthConfig { provider: Some("github-copilot".to_string()), - scopes: vec![], + ..Default::default() }; let err = resolve(&cfg).unwrap_err().to_string(); assert!(err.contains("unknown oauth provider"), "got: {err}"); From 7ea3e5931634b8b30ecdc96d0ae79b235a4f5b22 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:10:55 +0000 Subject: [PATCH 37/54] fix(openab-agent/mcp): rustfmt struct-pattern nested-call multi-line Burned Tick 30: `if let ServerConfig::Http { oauth: Some(oauth), .. } = server` at 74 chars fmt-rejects because the nested `Some(oauth)` binding forces multi-line struct-pattern formatting regardless of total line width. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/config.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 6509837a7..003bb5310 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -128,7 +128,10 @@ impl McpConfig { /// the first failure — finer-grained per-server isolation lives in §5.6. pub fn validate(&self) -> Result<()> { for (name, server) in &self.servers { - if let ServerConfig::Http { oauth: Some(oauth), .. } = server { + if let ServerConfig::Http { + oauth: Some(oauth), .. + } = server + { oauth.validate(name)?; } } @@ -351,7 +354,9 @@ mod tests { } }"#; let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let ServerConfig::Http { oauth: Some(oauth), .. } = cfg.servers.get("custom").unwrap() + let ServerConfig::Http { + oauth: Some(oauth), .. + } = cfg.servers.get("custom").unwrap() else { panic!("expected http with oauth"); }; From 65f1c4d8a7faa86e2826c6535f57b28ecfe6a060 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:23:57 +0000 Subject: [PATCH 38/54] =?UTF-8?q?feat(openab-agent/mcp):=20resolve=20custo?= =?UTF-8?q?m=20OAuth=20providers=20(ADR=20=C2=A76.3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolve() now returns ResolvedProvider (owned strings) instead of (ProviderSpec, Vec). Built-in providers fill it from their static spec; unknown providers fall through to the custom path, which requires authorize_url + token_url and propagates client_id / device_authorization_endpoint when supplied. callback is None for custom (§6.4 picks the port at login time). Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/oauth.rs | 167 ++++++++++++++++++++++++++-------- 1 file changed, 131 insertions(+), 36 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index e31d9807b..52d61a36f 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -1,11 +1,7 @@ -//! OAuth provider catalog (ADR §6.2). Wiring into the rmcp Streamable HTTP -//! transport + agent-guided flows (§6.4) lands in subsequent slices; this -//! module is the data layer the login / refresh code will dispatch through. -//! -//! Scopes are stored as `&'static [&'static str]` so callers can join them -//! with the space delimiter the OAuth 2.1 spec mandates without owning a -//! `Vec`. Per-server overrides (`OAuthConfig.scopes`) replace the defaults -//! and pay for a `Vec` at the boundary. +//! OAuth provider catalog (ADR §6.2) + custom-provider resolution (§6.3). +//! Wiring into the rmcp Streamable HTTP transport + agent-guided flows +//! (§6.4) lands in subsequent slices; this module is the data layer the +//! login / refresh code will dispatch through. // The §6.4 login slice is the first prod caller — until then, every item // here is reachable only via the unit tests below, so `cargo clippy @@ -17,9 +13,8 @@ use anyhow::{anyhow, Result}; use super::config::OAuthConfig; -/// Static description of a single OAuth provider — URLs + the loopback -/// redirect the §6.4 browser flow listens on. `default_scopes` is the -/// minimum set the agent will request when `oauth.scopes` is omitted +/// Static description of a single built-in OAuth provider. `default_scopes` +/// is the minimum set the agent will request when `oauth.scopes` is omitted /// from the server config; per-server overrides win when present. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ProviderSpec { @@ -46,8 +41,7 @@ pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { }; /// Look up a built-in `ProviderSpec` by config name. Returns `None` for -/// custom providers (handled by §6.3 once `OAuthConfig` grows the URL -/// fields) and for unknown names. +/// custom providers (§6.3) and for unknown names. pub fn builtin(name: &str) -> Option { match name { "anthropic-mcp" => Some(ANTHROPIC_MCP), @@ -55,26 +49,71 @@ pub fn builtin(name: &str) -> Option { } } -/// Resolve a server's `oauth:` block to a `ProviderSpec` plus the effective -/// scope list. `OAuthConfig::scopes`, when non-empty, replaces the spec's -/// defaults entirely — the caller never needs to merge. +/// Effective per-server OAuth parameters after resolving the built-in catalog +/// and `OAuthConfig` overrides. `callback` is `None` for custom providers +/// (§6.4 picks a free port at login time); built-ins pin theirs. `client_id` +/// is `None` for built-ins (the per-provider flow code in §6.4 owns it) and +/// optional for custom providers — OAuth 2.1 servers vary on whether public +/// clients must register. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ResolvedProvider { + pub authorize_url: String, + pub token_url: String, + pub client_id: Option, + pub callback: Option, + pub device_authorization_endpoint: Option, + pub scopes: Vec, +} + +/// Resolve a server's `oauth:` block. Built-in providers come from +/// `builtin()`; unknown providers fall through to the §6.3 custom path, +/// which requires `authorize_url` + `token_url` on the config. /// -/// Custom providers (per ADR §6.3) require `OAuthConfig` to grow -/// `authorize_url` / `token_url` fields; until that lands, an `oauth:` -/// block without a known `provider` is an error. -pub fn resolve(cfg: &OAuthConfig) -> Result<(ProviderSpec, Vec)> { +/// `OAuthConfig::scopes`, when non-empty, replaces the spec's defaults +/// entirely — the caller never needs to merge. +pub fn resolve(cfg: &OAuthConfig) -> Result { let provider = cfg .provider .as_deref() - .ok_or_else(|| anyhow!("oauth.provider is required (custom providers land in §6.3)"))?; - let spec = builtin(provider) - .ok_or_else(|| anyhow!("unknown oauth provider {provider:?} (built-ins: anthropic-mcp)"))?; + .ok_or_else(|| anyhow!("oauth.provider is required"))?; + if let Some(spec) = builtin(provider) { + Ok(resolve_builtin(spec, cfg)) + } else { + resolve_custom(provider, cfg) + } +} + +fn resolve_builtin(spec: ProviderSpec, cfg: &OAuthConfig) -> ResolvedProvider { let scopes = if cfg.scopes.is_empty() { spec.default_scopes.iter().map(|s| s.to_string()).collect() } else { cfg.scopes.clone() }; - Ok((spec, scopes)) + ResolvedProvider { + authorize_url: spec.authorize_url.to_string(), + token_url: spec.token_url.to_string(), + client_id: None, + callback: Some(spec.callback.to_string()), + device_authorization_endpoint: None, + scopes, + } +} + +fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result { + let authorize_url = cfg.authorize_url.clone().ok_or_else(|| { + anyhow!("custom oauth provider {provider:?}: oauth.authorize_url is required (ADR §6.3)") + })?; + let token_url = cfg.token_url.clone().ok_or_else(|| { + anyhow!("custom oauth provider {provider:?}: oauth.token_url is required (ADR §6.3)") + })?; + Ok(ResolvedProvider { + authorize_url, + token_url, + client_id: cfg.client_id.clone(), + callback: None, + device_authorization_endpoint: cfg.device_authorization_endpoint.clone(), + scopes: cfg.scopes.clone(), + }) } #[cfg(test)] @@ -97,41 +136,97 @@ mod tests { } #[test] - fn resolve_uses_default_scopes_when_config_omits_them() { + fn resolve_builtin_uses_default_scopes_when_config_omits_them() { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), ..Default::default() }; - let (spec, scopes) = resolve(&cfg).unwrap(); - assert_eq!(spec, ANTHROPIC_MCP); - assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); + let r = resolve(&cfg).unwrap(); + assert_eq!(r.authorize_url, ANTHROPIC_MCP.authorize_url); + assert_eq!(r.callback.as_deref(), Some(ANTHROPIC_MCP.callback)); + assert_eq!(r.scopes.len(), ANTHROPIC_MCP.default_scopes.len()); + assert!(r.client_id.is_none()); + assert!(r.device_authorization_endpoint.is_none()); } #[test] - fn resolve_uses_config_scopes_when_provided() { + fn resolve_builtin_uses_config_scopes_when_provided() { let cfg = OAuthConfig { provider: Some("anthropic-mcp".to_string()), scopes: vec!["user:profile".to_string(), "user:inference".to_string()], ..Default::default() }; - let (_, scopes) = resolve(&cfg).unwrap(); - assert_eq!(scopes, vec!["user:profile", "user:inference"]); + let r = resolve(&cfg).unwrap(); + assert_eq!(r.scopes, vec!["user:profile", "user:inference"]); } #[test] fn resolve_rejects_missing_provider() { - let cfg = OAuthConfig::default(); - let err = resolve(&cfg).unwrap_err().to_string(); + let err = resolve(&OAuthConfig::default()).unwrap_err().to_string(); assert!(err.contains("required"), "got: {err}"); } #[test] - fn resolve_rejects_unknown_provider() { + fn resolve_custom_uses_config_urls_and_propagates_device_endpoint() { + let cfg = OAuthConfig { + provider: Some("linear".to_string()), + authorize_url: Some("https://linear.app/oauth/authorize".to_string()), + token_url: Some("https://api.linear.app/oauth/token".to_string()), + client_id: Some("client-abc".to_string()), + device_authorization_endpoint: Some("https://linear.app/oauth/device".to_string()), + scopes: vec!["read".to_string(), "write".to_string()], + ..Default::default() + }; + let r = resolve(&cfg).unwrap(); + assert_eq!(r.authorize_url, "https://linear.app/oauth/authorize"); + assert_eq!(r.token_url, "https://api.linear.app/oauth/token"); + assert_eq!(r.client_id.as_deref(), Some("client-abc")); + assert_eq!( + r.device_authorization_endpoint.as_deref(), + Some("https://linear.app/oauth/device"), + ); + assert!( + r.callback.is_none(), + "custom providers defer callback to login-time port allocation", + ); + assert_eq!(r.scopes, vec!["read", "write"]); + } + + #[test] + fn resolve_custom_minimal_two_urls_only() { + let cfg = OAuthConfig { + provider: Some("acme".to_string()), + authorize_url: Some("https://acme.example/authorize".to_string()), + token_url: Some("https://acme.example/token".to_string()), + ..Default::default() + }; + let r = resolve(&cfg).unwrap(); + assert!(r.client_id.is_none()); + assert!(r.device_authorization_endpoint.is_none()); + assert!(r.callback.is_none()); + assert!(r.scopes.is_empty()); + } + + #[test] + fn resolve_custom_rejects_missing_authorize_url() { + let cfg = OAuthConfig { + provider: Some("custom".to_string()), + token_url: Some("https://example.com/token".to_string()), + ..Default::default() + }; + let err = resolve(&cfg).unwrap_err().to_string(); + assert!(err.contains("authorize_url"), "got: {err}"); + assert!(err.contains("custom"), "got: {err}"); + } + + #[test] + fn resolve_custom_rejects_missing_token_url() { let cfg = OAuthConfig { - provider: Some("github-copilot".to_string()), + provider: Some("custom".to_string()), + authorize_url: Some("https://example.com/authorize".to_string()), ..Default::default() }; let err = resolve(&cfg).unwrap_err().to_string(); - assert!(err.contains("unknown oauth provider"), "got: {err}"); + assert!(err.contains("token_url"), "got: {err}"); } } From 04b84f95f3b4cbccb8dadda7d64663d92297f9c2 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:34:14 +0000 Subject: [PATCH 39/54] refactor(openab-agent/mcp): ResolvedProvider as enum {Builtin, Custom} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Mira's Tick 32 review: encode "callback always pinned for built-ins, dynamic for custom" and "client_id owned by §6.4 for built-ins, from config for custom" as variants instead of convention-based Option fields. §6.4 caller gets exhaustive matching for free. ProviderSpec gains a `name: &'static str` field so the resolver can copy it into Builtin's provider_name without re-matching the catalog key. BUILTINS slice replaces the duplicated string-key match in builtin() — single source of truth. authorize_url() / token_url() / scopes() getters keep the call sites that don't care about the variant from drowning in `match`. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/oauth.rs | 157 +++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 51 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index 52d61a36f..c18ddddc6 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -18,6 +18,7 @@ use super::config::OAuthConfig; /// from the server config; per-server overrides win when present. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ProviderSpec { + pub name: &'static str, pub authorize_url: &'static str, pub token_url: &'static str, pub callback: &'static str, @@ -27,6 +28,7 @@ pub struct ProviderSpec { /// Anthropic MCP (claude.ai). Scope list from ADR §6.2 — `org:create_api_key` /// is the broadest grant; consumers should narrow via per-server overrides. pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { + name: "anthropic-mcp", authorize_url: "https://claude.ai/oauth/authorize", token_url: "https://platform.claude.com/v1/oauth/token", callback: "http://localhost:53692/callback", @@ -40,46 +42,86 @@ pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { ], }; +const BUILTINS: &[ProviderSpec] = &[ANTHROPIC_MCP]; + /// Look up a built-in `ProviderSpec` by config name. Returns `None` for /// custom providers (§6.3) and for unknown names. pub fn builtin(name: &str) -> Option { - match name { - "anthropic-mcp" => Some(ANTHROPIC_MCP), - _ => None, - } + BUILTINS.iter().copied().find(|spec| spec.name == name) } /// Effective per-server OAuth parameters after resolving the built-in catalog -/// and `OAuthConfig` overrides. `callback` is `None` for custom providers -/// (§6.4 picks a free port at login time); built-ins pin theirs. `client_id` -/// is `None` for built-ins (the per-provider flow code in §6.4 owns it) and -/// optional for custom providers — OAuth 2.1 servers vary on whether public -/// clients must register. +/// and `OAuthConfig` overrides. +/// +/// The two variants encode invariants that an `Option`-heavy struct couldn't: +/// built-ins always pin a `callback` (their PKCE port is hard-coded in the +/// provider's app registration) and never carry a `client_id` (the §6.4 flow +/// code owns it, mirroring `auth.rs::codex_client_id()`). Custom providers +/// flip both: §6.4 allocates a free port at login time, and `client_id` +/// comes from config (OAuth 2.1 public clients vary on registration). +/// +/// `device_authorization_endpoint` only appears on `Custom` — adding device +/// support for a built-in provider is a `ProviderSpec` schema change, not a +/// config flag. #[derive(Debug, Clone, PartialEq, Eq)] -pub struct ResolvedProvider { - pub authorize_url: String, - pub token_url: String, - pub client_id: Option, - pub callback: Option, - pub device_authorization_endpoint: Option, - pub scopes: Vec, +pub enum ResolvedProvider { + Builtin { + provider_name: &'static str, + authorize_url: &'static str, + token_url: &'static str, + callback: &'static str, + scopes: Vec, + }, + Custom { + provider_name: String, + authorize_url: String, + token_url: String, + client_id: Option, + device_authorization_endpoint: Option, + scopes: Vec, + }, +} + +impl ResolvedProvider { + /// Accessor for the shared `authorize_url` field. Callers that don't + /// need to distinguish built-in vs custom can skip the `match`. + pub fn authorize_url(&self) -> &str { + match self { + Self::Builtin { authorize_url, .. } => authorize_url, + Self::Custom { authorize_url, .. } => authorize_url, + } + } + + /// Accessor for the shared `token_url` field. + pub fn token_url(&self) -> &str { + match self { + Self::Builtin { token_url, .. } => token_url, + Self::Custom { token_url, .. } => token_url, + } + } + + /// Accessor for the shared scope list. + pub fn scopes(&self) -> &[String] { + match self { + Self::Builtin { scopes, .. } | Self::Custom { scopes, .. } => scopes, + } + } } /// Resolve a server's `oauth:` block. Built-in providers come from /// `builtin()`; unknown providers fall through to the §6.3 custom path, /// which requires `authorize_url` + `token_url` on the config. /// -/// `OAuthConfig::scopes`, when non-empty, replaces the spec's defaults +/// `OAuthConfig::scopes`, when non-empty, replaces the built-in defaults /// entirely — the caller never needs to merge. pub fn resolve(cfg: &OAuthConfig) -> Result { let provider = cfg .provider .as_deref() .ok_or_else(|| anyhow!("oauth.provider is required"))?; - if let Some(spec) = builtin(provider) { - Ok(resolve_builtin(spec, cfg)) - } else { - resolve_custom(provider, cfg) + match builtin(provider) { + Some(spec) => Ok(resolve_builtin(spec, cfg)), + None => resolve_custom(provider, cfg), } } @@ -89,12 +131,11 @@ fn resolve_builtin(spec: ProviderSpec, cfg: &OAuthConfig) -> ResolvedProvider { } else { cfg.scopes.clone() }; - ResolvedProvider { - authorize_url: spec.authorize_url.to_string(), - token_url: spec.token_url.to_string(), - client_id: None, - callback: Some(spec.callback.to_string()), - device_authorization_endpoint: None, + ResolvedProvider::Builtin { + provider_name: spec.name, + authorize_url: spec.authorize_url, + token_url: spec.token_url, + callback: spec.callback, scopes, } } @@ -106,11 +147,11 @@ fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result let token_url = cfg.token_url.clone().ok_or_else(|| { anyhow!("custom oauth provider {provider:?}: oauth.token_url is required (ADR §6.3)") })?; - Ok(ResolvedProvider { + Ok(ResolvedProvider::Custom { + provider_name: provider.to_string(), authorize_url, token_url, client_id: cfg.client_id.clone(), - callback: None, device_authorization_endpoint: cfg.device_authorization_endpoint.clone(), scopes: cfg.scopes.clone(), }) @@ -141,12 +182,15 @@ mod tests { provider: Some("anthropic-mcp".to_string()), ..Default::default() }; - let r = resolve(&cfg).unwrap(); - assert_eq!(r.authorize_url, ANTHROPIC_MCP.authorize_url); - assert_eq!(r.callback.as_deref(), Some(ANTHROPIC_MCP.callback)); - assert_eq!(r.scopes.len(), ANTHROPIC_MCP.default_scopes.len()); - assert!(r.client_id.is_none()); - assert!(r.device_authorization_endpoint.is_none()); + let ResolvedProvider::Builtin { + provider_name, callback, scopes, .. + } = resolve(&cfg).unwrap() + else { + panic!("expected Builtin variant"); + }; + assert_eq!(provider_name, "anthropic-mcp"); + assert_eq!(callback, ANTHROPIC_MCP.callback); + assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); } #[test] @@ -157,7 +201,7 @@ mod tests { ..Default::default() }; let r = resolve(&cfg).unwrap(); - assert_eq!(r.scopes, vec!["user:profile", "user:inference"]); + assert_eq!(r.scopes(), &["user:profile", "user:inference"]); } #[test] @@ -177,19 +221,26 @@ mod tests { scopes: vec!["read".to_string(), "write".to_string()], ..Default::default() }; - let r = resolve(&cfg).unwrap(); - assert_eq!(r.authorize_url, "https://linear.app/oauth/authorize"); - assert_eq!(r.token_url, "https://api.linear.app/oauth/token"); - assert_eq!(r.client_id.as_deref(), Some("client-abc")); + let ResolvedProvider::Custom { + provider_name, + authorize_url, + token_url, + client_id, + device_authorization_endpoint, + scopes, + } = resolve(&cfg).unwrap() + else { + panic!("expected Custom variant"); + }; + assert_eq!(provider_name, "linear"); + assert_eq!(authorize_url, "https://linear.app/oauth/authorize"); + assert_eq!(token_url, "https://api.linear.app/oauth/token"); + assert_eq!(client_id.as_deref(), Some("client-abc")); assert_eq!( - r.device_authorization_endpoint.as_deref(), + device_authorization_endpoint.as_deref(), Some("https://linear.app/oauth/device"), ); - assert!( - r.callback.is_none(), - "custom providers defer callback to login-time port allocation", - ); - assert_eq!(r.scopes, vec!["read", "write"]); + assert_eq!(scopes, vec!["read", "write"]); } #[test] @@ -200,11 +251,15 @@ mod tests { token_url: Some("https://acme.example/token".to_string()), ..Default::default() }; - let r = resolve(&cfg).unwrap(); - assert!(r.client_id.is_none()); - assert!(r.device_authorization_endpoint.is_none()); - assert!(r.callback.is_none()); - assert!(r.scopes.is_empty()); + let ResolvedProvider::Custom { + client_id, device_authorization_endpoint, scopes, .. + } = resolve(&cfg).unwrap() + else { + panic!("expected Custom variant"); + }; + assert!(client_id.is_none()); + assert!(device_authorization_endpoint.is_none()); + assert!(scopes.is_empty()); } #[test] From f61b498398ee50778e0382dc35539ab878a6d13b Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Sun, 31 May 2026 23:41:05 +0000 Subject: [PATCH 40/54] fix(openab-agent/mcp): rustfmt per-line struct-pattern binders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Burned Tick 33: rustfmt's struct-pattern formatting splits ≥3 simple binders onto their own lines, even when the body would fit inline. Different rule from the nested-call case (Tick 30) where the body stays inline. Runbook updated with both rules side-by-side. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/oauth.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index c18ddddc6..fa6cb2497 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -183,7 +183,10 @@ mod tests { ..Default::default() }; let ResolvedProvider::Builtin { - provider_name, callback, scopes, .. + provider_name, + callback, + scopes, + .. } = resolve(&cfg).unwrap() else { panic!("expected Builtin variant"); @@ -252,7 +255,10 @@ mod tests { ..Default::default() }; let ResolvedProvider::Custom { - client_id, device_authorization_endpoint, scopes, .. + client_id, + device_authorization_endpoint, + scopes, + .. } = resolve(&cfg).unwrap() else { panic!("expected Custom variant"); From 24001447f7485f95231c2795a0e04f79f3f15e79 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:00:28 +0000 Subject: [PATCH 41/54] =?UTF-8?q?feat(openab-agent/mcp):=20paste-back=20OA?= =?UTF-8?q?uth=20flow=20primitives=20(ADR=20=C2=A76.4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New mcp::flow module exposes init_paste_authorize(provider, client_id, redirect_uri) -> PasteAuthorize, which generates the PKCE pair + state nonce internally and returns the authorize URL plus the secrets the caller must persist for complete_login to validate the callback. Internalizing pair generation removes a footgun (caller can't mismatch verifier/state) and shrinks the API to the two parameters that actually vary per call. auth::generate_pkce promoted from private to pub so the MCP flow path can share it with Codex — security primitive, single source of truth, no drift on future hardening. Module-scope #![allow(dead_code)] consistent with mcp::oauth — first prod caller (the §6.4 login orchestration) lands in the next slice. Tests cover URL structure, percent-encoding of redirect_uri, scope form-encoding, unparseable authorize_url error path, and custom-provider URL composition. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 2 +- openab-agent/src/mcp/flow.rs | 146 +++++++++++++++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 1 + 3 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 openab-agent/src/mcp/flow.rs diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index b83179e15..ec3a49ca0 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -227,7 +227,7 @@ async fn refresh_token(store: &TokenStore) -> Result { }) } -fn generate_pkce() -> (String, String) { +pub fn generate_pkce() -> (String, String) { let mut buf = [0u8; 32]; getrandom::fill(&mut buf).expect("getrandom failed"); let verifier = URL_SAFE_NO_PAD.encode(buf); diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs new file mode 100644 index 000000000..17b34b5ce --- /dev/null +++ b/openab-agent/src/mcp/flow.rs @@ -0,0 +1,146 @@ +//! OAuth 2.1 paste-back flow primitives (ADR §6.4). PKCE comes from +//! `crate::auth::generate_pkce` — shared with the Codex paths so a +//! security-primitive change can't drift between modules. Orchestration +//! (device polling, callback parsing) lands in subsequent slices. + +// First prod caller (§6.4 login orchestration) lands in the next slice; +// until then every item is reachable only via tests, so +// `clippy --features mcp -D warnings` would flag dead_code. +#![allow(dead_code)] + +use anyhow::Result; +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; +use url::Url; + +use crate::auth::generate_pkce; +use super::oauth::ResolvedProvider; + +/// 16-byte URL-safe `state` nonce for the OAuth authorize URL. +fn generate_state() -> String { + let mut buf = [0u8; 16]; + getrandom::fill(&mut buf).expect("getrandom failed"); + URL_SAFE_NO_PAD.encode(buf) +} + +/// Result of `init_paste_authorize`: the URL to surface to the user, plus +/// the `code_verifier` + `state` the caller must persist under the +/// pending-login key for `complete_login` to validate the callback. +pub struct PasteAuthorize { + pub url: String, + pub code_verifier: String, + pub state: String, +} + +/// Start a paste-back OAuth 2.1 authorize flow. Generates the PKCE pair +/// and state nonce internally so the caller can't pair them up wrong; +/// builds the RFC 6749 authorize URL with `S256` PKCE and space-joined +/// scopes. `client_id` is caller-supplied: built-ins look it up via a +/// hard-coded helper (mirroring `auth::codex_client_id`); custom +/// providers carry it on `ResolvedProvider::Custom`. `redirect_uri` is +/// the provider's pinned callback for built-ins or a runtime-bound +/// `localhost:` for custom paste-back flows. +pub fn init_paste_authorize( + provider: &ResolvedProvider, + client_id: &str, + redirect_uri: &str, +) -> Result { + let (code_verifier, code_challenge) = generate_pkce(); + let state = generate_state(); + let mut url = Url::parse(provider.authorize_url())?; + url.query_pairs_mut() + .append_pair("response_type", "code") + .append_pair("client_id", client_id) + .append_pair("redirect_uri", redirect_uri) + .append_pair("code_challenge", &code_challenge) + .append_pair("code_challenge_method", "S256") + .append_pair("state", &state) + .append_pair("scope", &provider.scopes().join(" ")); + Ok(PasteAuthorize { + url: url.to_string(), + code_verifier, + state, + }) +} + +#[cfg(test)] +mod tests { + use crate::mcp::config::OAuthConfig; + use crate::mcp::oauth::resolve; + use super::*; + + const TEST_REDIRECT: &str = "http://localhost:53692/callback"; + + #[test] + fn state_is_url_safe_and_unique() { + let s = generate_state(); + let url_safe = s + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'); + assert!(url_safe); + assert_ne!(s, generate_state()); + } + + fn builtin_provider() -> ResolvedProvider { + let cfg = OAuthConfig { + provider: Some("anthropic-mcp".to_string()), + ..Default::default() + }; + resolve(&cfg).unwrap() + } + + #[test] + fn init_paste_authorize_threads_pkce_and_state_into_url() { + let p = builtin_provider(); + let r = init_paste_authorize(&p, "client-xyz", TEST_REDIRECT).unwrap(); + assert!(r.url.starts_with("https://claude.ai/oauth/authorize?")); + assert!(r.url.contains("response_type=code")); + assert!(r.url.contains("client_id=client-xyz")); + assert!(r.url.contains("code_challenge_method=S256")); + assert!(r.url.contains(&format!("state={}", r.state))); + assert!(!r.code_verifier.is_empty()); + } + + #[test] + fn init_paste_authorize_percent_encodes_redirect_uri() { + let p = builtin_provider(); + let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); + let want = "redirect_uri=http%3A%2F%2Flocalhost%3A53692%2Fcallback"; + assert!(r.url.contains(want)); + } + + #[test] + fn init_paste_authorize_form_encodes_scope_spaces_as_plus() { + let p = builtin_provider(); + let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); + assert!(r.url.contains("scope=org%3Acreate_api_key")); + assert!(r.url.contains("user%3Amcp_servers")); + } + + #[test] + fn init_paste_authorize_rejects_unparseable_authorize_url() { + let cfg = OAuthConfig { + provider: Some("broken".to_string()), + authorize_url: Some("not a url".to_string()), + token_url: Some("https://example.com/token".to_string()), + ..Default::default() + }; + let p = resolve(&cfg).unwrap(); + assert!(init_paste_authorize(&p, "c", TEST_REDIRECT).is_err()); + } + + #[test] + fn init_paste_authorize_for_custom_provider() { + let cfg = OAuthConfig { + provider: Some("linear".to_string()), + authorize_url: Some("https://linear.app/oauth/authorize".to_string()), + token_url: Some("https://api.linear.app/oauth/token".to_string()), + client_id: Some("linear-client".to_string()), + scopes: vec!["read".to_string(), "write".to_string()], + ..Default::default() + }; + let p = resolve(&cfg).unwrap(); + let r = init_paste_authorize(&p, "linear-client", TEST_REDIRECT).unwrap(); + assert!(r.url.starts_with("https://linear.app/oauth/authorize?")); + assert!(r.url.contains("scope=read+write")); + } +} diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 17884c9c1..81278aa4e 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,6 +1,7 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; +pub mod flow; pub mod meta_tool; pub mod oauth; pub mod runtime; From 7f4ef17a233ab6d34ee5248d1d5dc444857dedf7 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:11:04 +0000 Subject: [PATCH 42/54] fix(openab-agent/mcp): rustfmt import precedence super < crate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rustfmt's reorder_imports does NOT sort by pure alphabetical order across use statements — local path roots have a fixed precedence: self < super < crate < external crates. My alphabetical assumption (c < s, so crate first) was wrong in both flow.rs's module-scope imports and its mod tests block. Burned Tick 36 (twice, same file). Runbook updated alongside the Tick 14 sub-module-vs-bare ordering note since the two rules apply at different scopes. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/flow.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index 17b34b5ce..caac9d340 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -12,8 +12,8 @@ use anyhow::Result; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use url::Url; -use crate::auth::generate_pkce; use super::oauth::ResolvedProvider; +use crate::auth::generate_pkce; /// 16-byte URL-safe `state` nonce for the OAuth authorize URL. fn generate_state() -> String { @@ -64,9 +64,9 @@ pub fn init_paste_authorize( #[cfg(test)] mod tests { + use super::*; use crate::mcp::config::OAuthConfig; use crate::mcp::oauth::resolve; - use super::*; const TEST_REDIRECT: &str = "http://localhost:53692/callback"; From 95f33bc2d51487330df4aef5fca33576b157d302 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:25:40 +0000 Subject: [PATCH 43/54] feat(openab-agent/mcp): NeedsAuth state for oauth-protected http servers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ServerStatus gains a NeedsAuth variant; McpRuntimeManager::connect() now transitions oauth-protected http servers into that state with an error pointing the user at `mcp login ` instead of staying silently Disconnected. Icon "◌" (U+25CC DOTTED CIRCLE) matches the existing geometric family (○ ◐ ●) rather than "⚠" — the latter is emoji-prone (Discord and many terminals upgrade it via VS16), which would break aligned status output. Status label "needs_auth" wired through meta_tool's snake_case status_label() table. Tests cover both the transition + error format and an idempotency guarantee: a second connect() on a NeedsAuth server must keep the state sticky (only successful `mcp login` clears it). Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/mcp/meta_tool.rs | 1 + openab-agent/src/mcp/runtime.rs | 58 +++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 557badf4c..7ecfe0034 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -186,6 +186,7 @@ fn status_label(status: &ServerStatus) -> &'static str { ServerStatus::Disconnected => "disconnected", ServerStatus::Connecting => "connecting", ServerStatus::Connected => "connected", + ServerStatus::NeedsAuth => "needs_auth", ServerStatus::Failed(_) => "failed", } } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 933fd23d5..3e00dfba0 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -24,12 +24,12 @@ use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -#[allow(dead_code)] // NeedsAuth lands with the Phase 2 OAuth slice (ADR §5.7) #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { Disconnected, Connecting, Connected, + NeedsAuth, Failed(String), } @@ -39,6 +39,7 @@ impl ServerStatus { ServerStatus::Disconnected => "○", ServerStatus::Connecting => "◐", ServerStatus::Connected => "●", + ServerStatus::NeedsAuth => "◌", ServerStatus::Failed(_) => "✗", } } @@ -147,8 +148,10 @@ impl McpRuntimeManager { } /// Lazy-connect the named server (ADR §5.7). Idempotent if already - /// `Connected` with a live client. HTTP servers requiring OAuth are - /// rejected until the Phase 2 auth slice lands (ADR §6). + /// `Connected` with a live client. HTTP servers with an `oauth:` block + /// are routed through `mcp login` first — `connect` marks them + /// `NeedsAuth` and returns an error pointing the caller at the login + /// subcommand rather than attempting an unauthenticated dial. pub async fn connect(&self, name: &str) -> Result<()> { let dial = { let mut guard = self.handles.write().await; @@ -163,17 +166,16 @@ impl McpRuntimeManager { ServerConfig::Stdio { command, args, env, .. } => Dial::Stdio { command, args, env }, - // Reject oauth-protected servers BEFORE the `Connecting` - // transition: we never attempted a handshake, so leaving - // status at `Disconnected` is the honest state. Status - // becomes `Failed` only when a dial was actually tried. - ServerConfig::Http { - oauth: Some(_), - url, - .. - } => { + // Oauth-protected servers can't be dialed via plain connect; + // mark `NeedsAuth` so `mcp status` shows a persistent + // "waiting for login" signal (vs `Disconnected`, which + // implies a plain `connect` would succeed). The `Failed` + // path remains reserved for dials that were attempted and + // failed at handshake. + ServerConfig::Http { oauth: Some(_), .. } => { + handle.status = ServerStatus::NeedsAuth; return Err(anyhow!( - "oauth-protected http server {url:?} requires the auth slice (Phase 2 §6)" + "mcp server {name:?} needs oauth login — run `mcp login {name}`" )); } ServerConfig::Http { url, .. } => Dial::Http { url }, @@ -298,7 +300,7 @@ mod tests { } #[tokio::test] - async fn connect_http_with_oauth_defers_to_auth_slice() { + async fn connect_http_with_oauth_marks_needs_auth() { let json = r#"{ "mcpServers": { "linear": { @@ -311,10 +313,30 @@ mod tests { let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); - assert!(err.contains("oauth"), "expected 'oauth' in {err}"); - // OAuth rejection happens BEFORE the Connecting transition, so the - // server remains Disconnected — no dial was attempted. - assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); + assert!(err.contains("needs oauth login"), "expected hint in {err}"); + assert!(err.contains("mcp login"), "expected 'mcp login' hint in {err}"); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + } + + #[tokio::test] + async fn connect_oauth_twice_keeps_needs_auth_sticky() { + // Second connect() must NOT silently re-enter `Connecting` and + // shadow the user-actionable state — the only path out of + // `NeedsAuth` is a successful `mcp login`. + let json = r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + assert!(mgr.connect("linear").await.is_err()); + assert!(mgr.connect("linear").await.is_err()); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); } #[tokio::test] From 32232c0e42ed1bde4a51b6ca7c6ee6973efc5acd Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:30:36 +0000 Subject: [PATCH 44/54] style(openab-agent/mcp): split assert! args to satisfy rustfmt fn_call_width Tick 37's assert!(err.contains("mcp login"), "...") was 84 chars inline but rustfmt's default fn_call_width=60 measures the arg list and split it. Match the formatter. --- openab-agent/src/mcp/runtime.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 3e00dfba0..e1534cefa 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -314,7 +314,10 @@ mod tests { let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); assert!(err.contains("needs oauth login"), "expected hint in {err}"); - assert!(err.contains("mcp login"), "expected 'mcp login' hint in {err}"); + assert!( + err.contains("mcp login"), + "expected 'mcp login' hint in {err}" + ); assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); } From af3a25ddd70bc5175b89dc190b0214e05441a547 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:49:05 +0000 Subject: [PATCH 45/54] feat(openab-agent/mcp): start_paste_login + builtin client_id resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit McpRuntimeManager::start_paste_login(server) wires flow::init_paste_authorize for built-in OAuth providers (ADR §6.4). The PKCE verifier + state are stashed in an in-memory pending_logins map (HashMap) for the next slice's complete_login to consume. Server status flips to NeedsAuth. Scope this slice: - Built-in providers only (anthropic-mcp). Custom-provider paste-back needs runtime callback port allocation; deferred to a follow-up slice. - Custom providers declaring device_authorization_endpoint short-circuit with an explicit "use device flow" error (ADR §6.4 selection logic). - ADR §6.4 says transient state lives "in TokenStore"; this slice keeps it in-process. auth.json needs a heterogeneous-entry schema change to hold non-token shapes — separate slice. oauth::builtin_client_id is the per-provider client_id resolver — env-var- required (no hard-coded default) so paste-back fails loud rather than emitting an authorize URL with a placeholder client_id. flow.rs sheds its module-level #![allow(dead_code)] now that init_paste_authorize has a prod caller transitively from start_paste_login (itself allow-dead-code until the next slice wires the mcp::login action). --- openab-agent/src/mcp/flow.rs | 5 - openab-agent/src/mcp/oauth.rs | 60 ++++++++ openab-agent/src/mcp/runtime.rs | 251 ++++++++++++++++++++++++++++++++ 3 files changed, 311 insertions(+), 5 deletions(-) diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index caac9d340..39ed8b13c 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -3,11 +3,6 @@ //! security-primitive change can't drift between modules. Orchestration //! (device polling, callback parsing) lands in subsequent slices. -// First prod caller (§6.4 login orchestration) lands in the next slice; -// until then every item is reachable only via tests, so -// `clippy --features mcp -D warnings` would flag dead_code. -#![allow(dead_code)] - use anyhow::Result; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use url::Url; diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index fa6cb2497..6d75d7952 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -50,6 +50,29 @@ pub fn builtin(name: &str) -> Option { BUILTINS.iter().copied().find(|spec| spec.name == name) } +/// Resolve a built-in provider's OAuth `client_id`. Mirrors +/// `auth::codex_client_id`'s env-var-override pattern but without a hard- +/// coded default — the Anthropic MCP public client_id isn't yet pinned in +/// this repo, so requiring the env var fails fast with a useful error +/// rather than silently dialing with a placeholder. Replace with a +/// hard-coded default once a real value is published. +pub fn builtin_client_id(provider: &str) -> Result { + let env_var = match provider { + "anthropic-mcp" => "OPENAB_MCP_ANTHROPIC_CLIENT_ID", + other => { + return Err(anyhow!( + "no built-in client_id mapping for provider {other:?}" + )); + } + }; + std::env::var(env_var).map_err(|_| { + anyhow!( + "built-in provider {provider:?} requires env var {env_var} \ + (client_id of the provider's OAuth app)" + ) + }) +} + /// Effective per-server OAuth parameters after resolving the built-in catalog /// and `OAuthConfig` overrides. /// @@ -161,6 +184,43 @@ fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result mod tests { use super::*; + // Both env-touching tests below race the same OS env var; serialize + // them per the runbook's Tick 24 lesson (acp.rs ANTHROPIC_API_KEY race). + static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + #[test] + fn builtin_client_id_requires_env_var() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // SAFETY: serialized via ENV_LOCK; isolated env key. + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + let err = builtin_client_id("anthropic-mcp") + .unwrap_err() + .to_string(); + assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); + } + + #[test] + fn builtin_client_id_uses_env_var_when_set() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // SAFETY: serialized via ENV_LOCK; isolated env key. + unsafe { + std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-test-id"); + } + let id = builtin_client_id("anthropic-mcp").unwrap(); + assert_eq!(id, "anth-test-id"); + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + } + + #[test] + fn builtin_client_id_rejects_unknown_provider() { + let err = builtin_client_id("does-not-exist").unwrap_err().to_string(); + assert!(err.contains("does-not-exist"), "got: {err}"); + } + #[test] fn anthropic_mcp_spec_matches_adr_table() { let spec = builtin("anthropic-mcp").expect("anthropic-mcp is built-in"); diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index e1534cefa..d87115e72 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -23,6 +23,8 @@ use tokio::process::Command; use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; +use super::flow::init_paste_authorize; +use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { @@ -67,11 +69,39 @@ impl std::fmt::Debug for ServerHandle { } } +/// Transient per-server state captured at `start_paste_login` and consumed +/// by `complete_login` (next slice). `token_url` + `provider_name` are +/// snapshotted up front so a config edit between the two calls can't +/// silently redirect the token exchange. +/// +/// ADR §6.4 says this lives "in TokenStore"; this slice keeps it in +/// process memory only — `auth.json` would need a heterogeneous-entry +/// schema change to hold non-token shapes, deferred to its own slice. +#[derive(Debug, Clone)] +#[allow(dead_code)] // wired in next slice (complete_login) +pub struct PendingPasteLogin { + pub verifier: String, + pub state: String, + pub token_url: String, + pub provider_name: String, +} + +/// Public return of `start_paste_login`. The caller relays `authorize_url` +/// to the user; `state` is echoed so the agent can show / log it without +/// reaching into runtime internals. +#[derive(Debug, Clone)] +#[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) +pub struct PasteLoginStart { + pub authorize_url: String, + pub state: String, +} + /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. #[derive(Debug, Default, Clone)] pub struct McpRuntimeManager { handles: Arc>>, + pending_logins: Arc>>, } impl McpRuntimeManager { @@ -91,6 +121,7 @@ impl McpRuntimeManager { .collect(); Self { handles: Arc::new(RwLock::new(handles)), + pending_logins: Arc::new(RwLock::new(HashMap::new())), } } @@ -147,6 +178,86 @@ impl McpRuntimeManager { out } + /// Begin a paste-back OAuth login for an HTTP server with an `oauth:` + /// block (ADR §6.4). Produces the authorize URL the agent surfaces to + /// the user; the matching PKCE verifier + `state` nonce are kept on + /// `self.pending_logins` for `complete_login` (next slice) to consume. + /// + /// Scoped to **built-in** providers this slice. Custom-provider + /// paste-back needs runtime port allocation for the callback (§6.4), + /// and any provider that advertises a `device_authorization_endpoint` + /// should run device-code instead (§6.4 selection logic). Both errors + /// are explicit so the LLM can pick a different action. + #[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) + pub async fn start_paste_login(&self, name: &str) -> Result { + let oauth_cfg = { + let guard = self.handles.read().await; + let handle = guard + .get(name) + .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; + match handle.config.resolved(name)? { + ServerConfig::Http { + oauth: Some(oauth), .. + } => oauth, + ServerConfig::Http { oauth: None, .. } => { + return Err(anyhow!("mcp server {name:?} has no oauth block")); + } + ServerConfig::Stdio { .. } => { + return Err(anyhow!("mcp server {name:?} is stdio, not http+oauth")); + } + } + }; + + let provider = resolve(&oauth_cfg)?; + let (client_id, redirect_uri) = match &provider { + ResolvedProvider::Builtin { + provider_name, callback, .. + } => (builtin_client_id(provider_name)?, (*callback).to_string()), + ResolvedProvider::Custom { + device_authorization_endpoint: Some(_), .. + } => { + return Err(anyhow!( + "mcp server {name:?} has a device endpoint; use device flow" + )); + } + ResolvedProvider::Custom { .. } => { + return Err(anyhow!( + "mcp server {name:?}: custom-provider paste-back not yet supported" + )); + } + }; + + let started = init_paste_authorize(&provider, &client_id, &redirect_uri)?; + let pending = PendingPasteLogin { + verifier: started.code_verifier, + state: started.state.clone(), + token_url: provider.token_url().to_string(), + provider_name: provider_name_of(&provider), + }; + { + let mut handles = self.handles.write().await; + if let Some(handle) = handles.get_mut(name) { + handle.status = ServerStatus::NeedsAuth; + } + } + self.pending_logins + .write() + .await + .insert(name.to_string(), pending); + Ok(PasteLoginStart { + authorize_url: started.url, + state: started.state, + }) + } + + /// Borrow the in-flight pending paste-login for `name`. Returns a + /// clone so callers don't hold the lock; `complete_login` (next + /// slice) is the intended consumer. + #[allow(dead_code)] // first prod caller is complete_login in next slice + pub async fn pending_paste_login(&self, name: &str) -> Option { + self.pending_logins.read().await.get(name).cloned() + } + /// Lazy-connect the named server (ADR §5.7). Idempotent if already /// `Connected` with a live client. HTTP servers with an `oauth:` block /// are routed through `mcp login` first — `connect` marks them @@ -211,6 +322,15 @@ impl McpRuntimeManager { } } +/// Stringified provider name for the pending-state record. `Builtin` keeps +/// its `&'static str` static; `Custom` already owns a `String`. +fn provider_name_of(provider: &ResolvedProvider) -> String { + match provider { + ResolvedProvider::Builtin { provider_name, .. } => (*provider_name).to_string(), + ResolvedProvider::Custom { provider_name, .. } => provider_name.clone(), + } +} + /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -362,6 +482,137 @@ mod tests { } } + // start_paste_login + builtin_client_id race on the same env var. + // Same fix as oauth.rs / acp.rs (Tick 24 lesson). + static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + fn linear_custom_cfg() -> &'static str { + r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { + "provider": "linear", + "authorize_url": "https://linear.app/oauth/authorize", + "token_url": "https://api.linear.app/oauth/token", + "client_id": "linear-client", + "scopes": ["read"] + } + } + } + }"# + } + + fn anthropic_builtin_cfg() -> &'static str { + r#"{ + "mcpServers": { + "anthro": { + "type": "http", + "url": "https://example.com/mcp", + "oauth": { "provider": "anthropic-mcp" } + } + } + }"# + } + + async fn start_login_err(mgr: &McpRuntimeManager, name: &str) -> String { + mgr.start_paste_login(name) + .await + .unwrap_err() + .to_string() + } + + #[tokio::test] + async fn start_paste_login_builtin_returns_authorize_url_and_pins_pending() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + // SAFETY: serialized via ENV_LOCK; isolated env key. + unsafe { + std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-cid"); + } + let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let start = mgr.start_paste_login("anthro").await.unwrap(); + assert!(start.authorize_url.starts_with("https://claude.ai/oauth/authorize?")); + assert!(start.authorize_url.contains("client_id=anth-cid")); + assert!(start.authorize_url.contains(&format!("state={}", start.state))); + let pending = mgr.pending_paste_login("anthro").await.unwrap(); + assert_eq!(pending.state, start.state); + assert!(!pending.verifier.is_empty()); + assert_eq!( + pending.token_url, + "https://platform.claude.com/v1/oauth/token" + ); + assert_eq!(pending.provider_name, "anthropic-mcp"); + assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + } + + #[tokio::test] + async fn start_paste_login_rejects_custom_provider_for_now() { + let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "linear").await; + assert!(err.contains("custom-provider"), "got: {err}"); + assert!(mgr.pending_paste_login("linear").await.is_none()); + } + + #[tokio::test] + async fn start_paste_login_rejects_custom_with_device_endpoint() { + let json = r#"{ + "mcpServers": { + "dev": { + "type": "http", + "url": "https://example.com/mcp", + "oauth": { + "provider": "dev", + "authorize_url": "https://example.com/oauth/authorize", + "token_url": "https://example.com/oauth/token", + "device_authorization_endpoint": "https://example.com/oauth/device" + } + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "dev").await; + assert!(err.contains("device flow"), "got: {err}"); + } + + #[tokio::test] + async fn start_paste_login_rejects_stdio_server() { + let json = r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "fs").await; + assert!(err.contains("stdio"), "got: {err}"); + } + + #[tokio::test] + async fn start_paste_login_unknown_server_errors() { + let mgr = McpRuntimeManager::from_config(McpConfig::default()); + let err = start_login_err(&mgr, "ghost").await; + assert!(err.contains("ghost"), "got: {err}"); + } + + #[tokio::test] + async fn start_paste_login_builtin_without_env_var_errors_loud() { + let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + unsafe { + std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); + } + let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let err = start_login_err(&mgr, "anthro").await; + assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); + } + #[tokio::test] async fn connect_to_missing_binary_records_failed() { let json = r#"{ From f0de2f29235a560687429b910010dc66bbc39705 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 00:51:23 +0000 Subject: [PATCH 46/54] style(openab-agent/mcp): satisfy rustfmt for Tick 39 slice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three distinct fmt rule misses on the start_paste_login slice: - oauth.rs: chain at receiver+chain = exactly 60 chars stays inline (over-broken on the unwrap_err+to_string chain) - runtime.rs: struct-pattern binders force per-line when arm body is also long enough that the inline form would overflow — different threshold than the existing connect()'s Stdio arm - runtime.rs: field-access chain ELEMENT counts toward chain_width; `start.authorize_url.starts_with(...)` is 2 chain elements, not 1, so receiver+chain over 60 → break --- openab-agent/src/mcp/oauth.rs | 4 +--- openab-agent/src/mcp/runtime.rs | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs index 6d75d7952..f3ea31661 100644 --- a/openab-agent/src/mcp/oauth.rs +++ b/openab-agent/src/mcp/oauth.rs @@ -195,9 +195,7 @@ mod tests { unsafe { std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); } - let err = builtin_client_id("anthropic-mcp") - .unwrap_err() - .to_string(); + let err = builtin_client_id("anthropic-mcp").unwrap_err().to_string(); assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index d87115e72..d363bf487 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -211,10 +211,13 @@ impl McpRuntimeManager { let provider = resolve(&oauth_cfg)?; let (client_id, redirect_uri) = match &provider { ResolvedProvider::Builtin { - provider_name, callback, .. + provider_name, + callback, + .. } => (builtin_client_id(provider_name)?, (*callback).to_string()), ResolvedProvider::Custom { - device_authorization_endpoint: Some(_), .. + device_authorization_endpoint: Some(_), + .. } => { return Err(anyhow!( "mcp server {name:?} has a device endpoint; use device flow" @@ -517,10 +520,7 @@ mod tests { } async fn start_login_err(mgr: &McpRuntimeManager, name: &str) -> String { - mgr.start_paste_login(name) - .await - .unwrap_err() - .to_string() + mgr.start_paste_login(name).await.unwrap_err().to_string() } #[tokio::test] @@ -533,9 +533,13 @@ mod tests { let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let start = mgr.start_paste_login("anthro").await.unwrap(); - assert!(start.authorize_url.starts_with("https://claude.ai/oauth/authorize?")); + assert!(start + .authorize_url + .starts_with("https://claude.ai/oauth/authorize?")); assert!(start.authorize_url.contains("client_id=anth-cid")); - assert!(start.authorize_url.contains(&format!("state={}", start.state))); + assert!(start + .authorize_url + .contains(&format!("state={}", start.state))); let pending = mgr.pending_paste_login("anthro").await.unwrap(); assert_eq!(pending.state, start.state); assert!(!pending.verifier.is_empty()); From 7dd718c5066021fa4f8feea0f65289e7285f97aa Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:11:12 +0000 Subject: [PATCH 47/54] feat(openab-agent): split auth.json into TokenStore | PendingPasteLogin Untagged Serde enum `AuthEntry` keeps refresh-task state machine separate from in-flight paste-login state. Per Mira's Tick 39 review: repurposing TokenStore fields for pending entries would have made refresh loop on them. Adds `{load,save,remove}_pending_login` helpers (mcp-gated, wired in next slice via runtime::start_paste_login). --- openab-agent/src/auth.rs | 203 +++++++++++++++++++++++++++++++++++---- 1 file changed, 183 insertions(+), 20 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index ec3a49ca0..076b56ea1 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -39,6 +39,38 @@ pub struct TokenStore { pub provider: String, } +/// Transient per-server state captured at `start_paste_login` and consumed +/// by `complete_login` (ADR §6.4). Lives in `auth.json` under +/// `mcp-pending:`. `token_url` + `provider_name` are snapshotted +/// up front so a config edit between init and finish can't redirect the +/// token exchange. +/// +/// Unconditionally compiled (not behind `mcp` feature) so a non-mcp build +/// can still parse + round-trip an `auth.json` containing pending entries. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PendingPasteLogin { + pub verifier: String, + pub state: String, + pub token_url: String, + pub provider_name: String, +} + +/// `auth.json` value type. Untagged Serde enum: `TokenStore` has required +/// `access_token`, `PendingPasteLogin` has required `verifier` — the +/// shapes are disjoint, so deserialization picks the right variant +/// without an explicit tag (and existing files stay byte-compatible). +/// +/// Per Mira's Tick 39 review: option-A (repurposing TokenStore fields for +/// pending state) would have made the refresh task treat pending entries +/// as "expired tokens" and loop on them. The untagged enum keeps the two +/// state machines completely separate. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum AuthEntry { + Token(TokenStore), + Pending(PendingPasteLogin), +} + fn auth_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home) @@ -54,7 +86,7 @@ fn auth_path() -> PathBuf { /// Discriminates by the top-level `access_token` key — present means the /// file is the legacy `TokenStore` shape, absent means the new namespaced /// map. A single JSON parse gives accurate error context either way. -fn read_auth_file(path: &Path) -> Result> { +fn read_auth_file(path: &Path) -> Result> { let data = std::fs::read_to_string(path)?; let value: serde_json::Value = serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}"))?; @@ -62,7 +94,7 @@ fn read_auth_file(path: &Path) -> Result> { let legacy: TokenStore = serde_json::from_value(value) .map_err(|e| anyhow!("Invalid auth.json (legacy format): {e}"))?; let mut map = HashMap::new(); - map.insert(CODEX_NAMESPACE.to_string(), legacy); + map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(legacy)); return Ok(map); } serde_json::from_value(value).map_err(|e| anyhow!("Invalid auth.json: {e}")) @@ -72,7 +104,7 @@ fn read_auth_file(path: &Path) -> Result> { /// satisfies the ADR §6.1 refresh-token rotation contract — without it, a /// Spot interruption between local write and S3 sync would restore a /// revoked refresh token from durable storage on the next task start. -fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { +fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { if let Some(dir) = path.parent() { std::fs::create_dir_all(dir)?; } @@ -106,18 +138,19 @@ pub fn load_tokens() -> Result { path.display() ) })?; - map.get(CODEX_NAMESPACE).cloned().ok_or_else(|| { - anyhow!( + match map.get(CODEX_NAMESPACE) { + Some(AuthEntry::Token(t)) => Ok(t.clone()), + _ => Err(anyhow!( "No codex credentials in {}. Run `openab-agent auth codex-oauth` first.", path.display() - ) - }) + )), + } } fn save_tokens(store: &TokenStore) -> Result<()> { let path = auth_path(); let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(CODEX_NAMESPACE.to_string(), store.clone()); + map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(store.clone())); write_auth_file(&path, &map) } @@ -130,9 +163,11 @@ pub fn load_namespaced_token(key: &str) -> Result { let path = auth_path(); let map = read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; - map.get(key) - .cloned() - .ok_or_else(|| anyhow!("no credentials stored for {key:?}")) + match map.get(key) { + Some(AuthEntry::Token(t)) => Ok(t.clone()), + Some(AuthEntry::Pending(_)) => Err(anyhow!("{key:?} is a pending login, not a token")), + None => Err(anyhow!("no credentials stored for {key:?}")), + } } /// Insert or replace the credential at `key`, preserving all other entries. @@ -143,7 +178,54 @@ pub fn load_namespaced_token(key: &str) -> Result { pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { let path = auth_path(); let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(key.to_string(), store.clone()); + map.insert(key.to_string(), AuthEntry::Token(store.clone())); + write_auth_file(&path, &map) +} + +/// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors +/// if the key holds a token instead — the two namespaces shouldn't +/// collide, but a hand-edited file would. +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) +pub fn load_pending_login(key: &str) -> Result { + let path = auth_path(); + let map = + read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + match map.get(key) { + Some(AuthEntry::Pending(p)) => Ok(p.clone()), + Some(AuthEntry::Token(_)) => Err(anyhow!("{key:?} is a token, not a pending login")), + None => Err(anyhow!("no pending login for {key:?}")), + } +} + +/// Persist a `PendingPasteLogin` under `mcp-pending:` (ADR §6.4). +/// Read-modify-write — same serialization caveat as `save_namespaced_token`. +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) +pub fn save_pending_login(key: &str, val: &PendingPasteLogin) -> Result<()> { + let path = auth_path(); + let mut map = read_auth_file(&path).unwrap_or_default(); + map.insert(key.to_string(), AuthEntry::Pending(val.clone())); + write_auth_file(&path, &map) +} + +/// Remove a pending-login entry (consumed on successful `complete_login`, +/// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. +#[cfg(feature = "mcp")] +#[allow(dead_code)] // wired in next slice (complete_login) +pub fn remove_pending_login(key: &str) -> Result<()> { + let path = auth_path(); + let mut map = match read_auth_file(&path) { + Ok(m) => m, + Err(_) => return Ok(()), + }; + if map.remove(key).is_none() { + return Ok(()); + } + if map.is_empty() { + let _ = std::fs::remove_file(&path); + return Ok(()); + } write_auth_file(&path, &map) } @@ -625,6 +707,13 @@ mod tests { assert_eq!(challenge, expected); } + fn token_of(entry: Option<&AuthEntry>) -> &TokenStore { + match entry { + Some(AuthEntry::Token(t)) => t, + other => panic!("expected Token, got {other:?}"), + } + } + #[test] fn read_auth_file_migrates_legacy_single_tenant_format() { let dir = tempfile::tempdir().unwrap(); @@ -634,7 +723,7 @@ mod tests { let map = read_auth_file(&path).unwrap(); assert_eq!(map.len(), 1); assert_eq!( - map.get(CODEX_NAMESPACE).unwrap().access_token, + token_of(map.get(CODEX_NAMESPACE)).access_token, "test_access_token_value" ); } @@ -644,13 +733,13 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("auth.json"); let mut input = HashMap::new(); - input.insert("codex".to_string(), make_store(1)); - input.insert("mcp:linear".to_string(), make_store(2)); + input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); + input.insert("mcp:linear".to_string(), AuthEntry::Token(make_store(2))); write_auth_file(&path, &input).unwrap(); let map = read_auth_file(&path).unwrap(); assert_eq!(map.len(), 2); - assert_eq!(map.get("codex").unwrap().expires_at, 1); - assert_eq!(map.get("mcp:linear").unwrap().expires_at, 2); + assert_eq!(token_of(map.get("codex")).expires_at, 1); + assert_eq!(token_of(map.get("mcp:linear")).expires_at, 2); } #[test] @@ -658,12 +747,12 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("auth.json"); let mut input = HashMap::new(); - input.insert("mcp:github".to_string(), make_store(42)); + input.insert("mcp:github".to_string(), AuthEntry::Token(make_store(42))); write_auth_file(&path, &input).unwrap(); let raw = std::fs::read_to_string(&path).unwrap(); assert!(raw.contains("mcp:github")); let map = read_auth_file(&path).unwrap(); - assert_eq!(map.get("mcp:github").unwrap().expires_at, 42); + assert_eq!(token_of(map.get("mcp:github")).expires_at, 42); } #[cfg(unix)] @@ -673,9 +762,83 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join("auth.json"); let mut input = HashMap::new(); - input.insert("codex".to_string(), make_store(0)); + input.insert("codex".to_string(), AuthEntry::Token(make_store(0))); write_auth_file(&path, &input).unwrap(); let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777; assert_eq!(mode, 0o600, "expected 0600, got {mode:o}"); } + + fn make_pending() -> PendingPasteLogin { + PendingPasteLogin { + verifier: "test-verifier".to_string(), + state: "test-state".to_string(), + token_url: "https://example.com/token".to_string(), + provider_name: "anthropic-mcp".to_string(), + } + } + + #[test] + fn auth_entry_untagged_round_trip_mixed_shapes() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); + input.insert( + "mcp-pending:linear".to_string(), + AuthEntry::Pending(make_pending()), + ); + write_auth_file(&path, &input).unwrap(); + let map = read_auth_file(&path).unwrap(); + assert_eq!(map.len(), 2); + assert_eq!(token_of(map.get("codex")).expires_at, 1); + match map.get("mcp-pending:linear") { + Some(AuthEntry::Pending(p)) => assert_eq!(p.verifier, "test-verifier"), + other => panic!("expected Pending, got {other:?}"), + } + } + + #[cfg(feature = "mcp")] + #[test] + fn pending_login_helpers_round_trip_via_global_path() { + // Drive the disk-backed save/load/remove path end-to-end. Touches + // the real `auth_path()` (env HOME) so isolate via a tempdir HOME. + // Single test = no need for an ENV_LOCK mutex. + let dir = tempfile::tempdir().unwrap(); + let prior_home = std::env::var("HOME").ok(); + // SAFETY: single-threaded, restored at end. + unsafe { + std::env::set_var("HOME", dir.path()); + } + let key = "mcp-pending:test-srv"; + save_pending_login(key, &make_pending()).unwrap(); + let got = load_pending_login(key).unwrap(); + assert_eq!(got, make_pending()); + remove_pending_login(key).unwrap(); + assert!(load_pending_login(key).is_err()); + unsafe { + match prior_home { + Some(h) => std::env::set_var("HOME", h), + None => std::env::remove_var("HOME"), + } + } + } + + #[cfg(feature = "mcp")] + #[test] + fn load_namespaced_token_errors_on_pending_entry() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + let mut input = HashMap::new(); + input.insert( + "mcp-pending:srv".to_string(), + AuthEntry::Pending(make_pending()), + ); + write_auth_file(&path, &input).unwrap(); + let map = read_auth_file(&path).unwrap(); + // Directly assert the discriminant rather than calling + // `load_namespaced_token`, which would also touch HOME and race + // the pending-helpers test above. Same intent, smaller blast radius. + let pending = map.get("mcp-pending:srv"); + assert!(matches!(pending, Some(AuthEntry::Pending(_)))); + } } From 79b1e2e5ed96d93fb70044f803875127ae7ef36f Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:32:34 +0000 Subject: [PATCH 48/54] feat(openab-agent/mcp): persist pending paste-login to auth.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `start_paste_login` now writes the `mcp-pending:` entry via `auth::save_pending_login`, dropping the in-memory `pending_logins` map. Aligns runtime state with the ADR §6.4 contract ("kept in TokenStore") so `complete_login` survives an agent restart. To keep tests off the real `$HOME/.openab/agent/auth.json` (the cross-module HOME-env race that bit Tick 24), the auth-path becomes an injected `PathBuf` field: `from_config()` defaults to `auth::auth_path()`, `from_config_with_auth_path()` lets tests point at a tempdir. The two tests that exercise the disk path adopt a `mgr_with_tempdir` helper; rejection tests untouched (they error before persist). `auth::{load,save,remove}_pending_login` likewise take `&Path` so they're driven by the injected path, not a global. --- openab-agent/src/auth.rs | 66 ++++++++++++++------------------- openab-agent/src/mcp/runtime.rs | 66 +++++++++++++++++---------------- 2 files changed, 62 insertions(+), 70 deletions(-) diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 076b56ea1..c1cda68a2 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -71,7 +71,10 @@ pub enum AuthEntry { Pending(PendingPasteLogin), } -fn auth_path() -> PathBuf { +/// Default location of `auth.json`. Exposed so `McpRuntimeManager` can +/// thread the same path into its constructor and tests can inject a +/// tempdir without touching `$HOME` (which would race cross-module). +pub fn auth_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home) .join(".openab") @@ -184,13 +187,13 @@ pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { /// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors /// if the key holds a token instead — the two namespaces shouldn't -/// collide, but a hand-edited file would. +/// collide, but a hand-edited file would. `path` is injected so the +/// runtime manager can point tests at a tempdir; production callers pass +/// `auth_path()`. #[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) -pub fn load_pending_login(key: &str) -> Result { - let path = auth_path(); +pub fn load_pending_login(path: &Path, key: &str) -> Result { let map = - read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; + read_auth_file(path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; match map.get(key) { Some(AuthEntry::Pending(p)) => Ok(p.clone()), Some(AuthEntry::Token(_)) => Err(anyhow!("{key:?} is a token, not a pending login")), @@ -201,21 +204,18 @@ pub fn load_pending_login(key: &str) -> Result { /// Persist a `PendingPasteLogin` under `mcp-pending:` (ADR §6.4). /// Read-modify-write — same serialization caveat as `save_namespaced_token`. #[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (runtime::start_paste_login) -pub fn save_pending_login(key: &str, val: &PendingPasteLogin) -> Result<()> { - let path = auth_path(); - let mut map = read_auth_file(&path).unwrap_or_default(); +pub fn save_pending_login(path: &Path, key: &str, val: &PendingPasteLogin) -> Result<()> { + let mut map = read_auth_file(path).unwrap_or_default(); map.insert(key.to_string(), AuthEntry::Pending(val.clone())); - write_auth_file(&path, &map) + write_auth_file(path, &map) } /// Remove a pending-login entry (consumed on successful `complete_login`, /// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. #[cfg(feature = "mcp")] #[allow(dead_code)] // wired in next slice (complete_login) -pub fn remove_pending_login(key: &str) -> Result<()> { - let path = auth_path(); - let mut map = match read_auth_file(&path) { +pub fn remove_pending_login(path: &Path, key: &str) -> Result<()> { + let mut map = match read_auth_file(path) { Ok(m) => m, Err(_) => return Ok(()), }; @@ -223,10 +223,10 @@ pub fn remove_pending_login(key: &str) -> Result<()> { return Ok(()); } if map.is_empty() { - let _ = std::fs::remove_file(&path); + let _ = std::fs::remove_file(path); return Ok(()); } - write_auth_file(&path, &map) + write_auth_file(path, &map) } /// Remove the credential at `key`. Idempotent — missing key is not an @@ -799,28 +799,17 @@ mod tests { #[cfg(feature = "mcp")] #[test] - fn pending_login_helpers_round_trip_via_global_path() { - // Drive the disk-backed save/load/remove path end-to-end. Touches - // the real `auth_path()` (env HOME) so isolate via a tempdir HOME. - // Single test = no need for an ENV_LOCK mutex. + fn pending_login_helpers_round_trip_via_injected_path() { + // Tempdir path injected directly — no HOME-env shimming, so this + // test can't race auth-touching tests in other modules. let dir = tempfile::tempdir().unwrap(); - let prior_home = std::env::var("HOME").ok(); - // SAFETY: single-threaded, restored at end. - unsafe { - std::env::set_var("HOME", dir.path()); - } + let path = dir.path().join("auth.json"); let key = "mcp-pending:test-srv"; - save_pending_login(key, &make_pending()).unwrap(); - let got = load_pending_login(key).unwrap(); + save_pending_login(&path, key, &make_pending()).unwrap(); + let got = load_pending_login(&path, key).unwrap(); assert_eq!(got, make_pending()); - remove_pending_login(key).unwrap(); - assert!(load_pending_login(key).is_err()); - unsafe { - match prior_home { - Some(h) => std::env::set_var("HOME", h), - None => std::env::remove_var("HOME"), - } - } + remove_pending_login(&path, key).unwrap(); + assert!(load_pending_login(&path, key).is_err()); } #[cfg(feature = "mcp")] @@ -835,9 +824,10 @@ mod tests { ); write_auth_file(&path, &input).unwrap(); let map = read_auth_file(&path).unwrap(); - // Directly assert the discriminant rather than calling - // `load_namespaced_token`, which would also touch HOME and race - // the pending-helpers test above. Same intent, smaller blast radius. + // Assert the discriminant directly. `load_namespaced_token` would + // reach into the real `$HOME/.openab/agent/auth.json` and race + // cross-module tests; the variant check is the actual property + // under test. let pending = map.get("mcp-pending:srv"); assert!(matches!(pending, Some(AuthEntry::Pending(_)))); } diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index d363bf487..78e8457d8 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -13,6 +13,7 @@ //! the duration of a child-process spawn + handshake. use std::collections::HashMap; +use std::path::PathBuf; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; @@ -25,6 +26,7 @@ use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; use super::flow::init_paste_authorize; use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; +use crate::auth::{auth_path, load_pending_login, save_pending_login, PendingPasteLogin}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { @@ -69,23 +71,6 @@ impl std::fmt::Debug for ServerHandle { } } -/// Transient per-server state captured at `start_paste_login` and consumed -/// by `complete_login` (next slice). `token_url` + `provider_name` are -/// snapshotted up front so a config edit between the two calls can't -/// silently redirect the token exchange. -/// -/// ADR §6.4 says this lives "in TokenStore"; this slice keeps it in -/// process memory only — `auth.json` would need a heterogeneous-entry -/// schema change to hold non-token shapes, deferred to its own slice. -#[derive(Debug, Clone)] -#[allow(dead_code)] // wired in next slice (complete_login) -pub struct PendingPasteLogin { - pub verifier: String, - pub state: String, - pub token_url: String, - pub provider_name: String, -} - /// Public return of `start_paste_login`. The caller relays `authorize_url` /// to the user; `state` is echoed so the agent can show / log it without /// reaching into runtime internals. @@ -98,14 +83,21 @@ pub struct PasteLoginStart { /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] pub struct McpRuntimeManager { handles: Arc>>, - pending_logins: Arc>>, + /// `auth.json` location used for `mcp-pending:` persistence. + /// Injectable so tests can point at a tempdir instead of `$HOME`, + /// avoiding cross-module HOME-env races (Tick 24 lesson + ADR §6.4). + auth_path: PathBuf, } impl McpRuntimeManager { pub fn from_config(cfg: McpConfig) -> Self { + Self::from_config_with_auth_path(cfg, auth_path()) + } + + pub fn from_config_with_auth_path(cfg: McpConfig, auth_path: PathBuf) -> Self { let handles: HashMap<_, _> = cfg .servers .into_iter() @@ -121,7 +113,7 @@ impl McpRuntimeManager { .collect(); Self { handles: Arc::new(RwLock::new(handles)), - pending_logins: Arc::new(RwLock::new(HashMap::new())), + auth_path, } } @@ -180,8 +172,9 @@ impl McpRuntimeManager { /// Begin a paste-back OAuth login for an HTTP server with an `oauth:` /// block (ADR §6.4). Produces the authorize URL the agent surfaces to - /// the user; the matching PKCE verifier + `state` nonce are kept on - /// `self.pending_logins` for `complete_login` (next slice) to consume. + /// the user; the matching PKCE verifier + `state` nonce are persisted + /// under `mcp-pending:` in `auth.json` for `complete_login` + /// (next slice) to consume. /// /// Scoped to **built-in** providers this slice. Custom-provider /// paste-back needs runtime port allocation for the callback (§6.4), @@ -237,28 +230,26 @@ impl McpRuntimeManager { token_url: provider.token_url().to_string(), provider_name: provider_name_of(&provider), }; + save_pending_login(&self.auth_path, &pending_key(name), &pending)?; { let mut handles = self.handles.write().await; if let Some(handle) = handles.get_mut(name) { handle.status = ServerStatus::NeedsAuth; } } - self.pending_logins - .write() - .await - .insert(name.to_string(), pending); Ok(PasteLoginStart { authorize_url: started.url, state: started.state, }) } - /// Borrow the in-flight pending paste-login for `name`. Returns a - /// clone so callers don't hold the lock; `complete_login` (next - /// slice) is the intended consumer. + /// Read the on-disk pending paste-login for `name`. `None` if there's + /// no entry or the file is unreadable; `complete_login` (next slice) + /// is the intended consumer and will distinguish the cases via the + /// `auth::load_pending_login` error message. #[allow(dead_code)] // first prod caller is complete_login in next slice pub async fn pending_paste_login(&self, name: &str) -> Option { - self.pending_logins.read().await.get(name).cloned() + load_pending_login(&self.auth_path, &pending_key(name)).ok() } /// Lazy-connect the named server (ADR §5.7). Idempotent if already @@ -334,6 +325,11 @@ fn provider_name_of(provider: &ResolvedProvider) -> String { } } +/// `auth.json` key for an in-flight paste-login (ADR §6.4 namespace). +fn pending_key(name: &str) -> String { + format!("mcp-pending:{name}") +} + /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -523,6 +519,12 @@ mod tests { mgr.start_paste_login(name).await.unwrap_err().to_string() } + fn mgr_with_tempdir(cfg: McpConfig) -> (McpRuntimeManager, tempfile::TempDir) { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("auth.json"); + (McpRuntimeManager::from_config_with_auth_path(cfg, path), dir) + } + #[tokio::test] async fn start_paste_login_builtin_returns_authorize_url_and_pins_pending() { let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); @@ -531,7 +533,7 @@ mod tests { std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-cid"); } let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); + let (mgr, _dir) = mgr_with_tempdir(cfg); let start = mgr.start_paste_login("anthro").await.unwrap(); assert!(start .authorize_url @@ -557,7 +559,7 @@ mod tests { #[tokio::test] async fn start_paste_login_rejects_custom_provider_for_now() { let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); + let (mgr, _dir) = mgr_with_tempdir(cfg); let err = start_login_err(&mgr, "linear").await; assert!(err.contains("custom-provider"), "got: {err}"); assert!(mgr.pending_paste_login("linear").await.is_none()); From fd6adcf3d1f124ca151aade6b59d95499c98c399 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:41:22 +0000 Subject: [PATCH 49/54] style(openab-agent/mcp): satisfy rustfmt for mgr_with_tempdir tuple `(McpRuntimeManager::from_config_with_auth_path(cfg, path), dir)` at 60 chars between parens trips rustfmt's tuple width heuristic and gets exploded into a 4-line literal. Bind the manager first so the tuple stays a 2-token one-liner. No behaviour change. --- openab-agent/src/mcp/runtime.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 78e8457d8..9e8a517e1 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -521,8 +521,8 @@ mod tests { fn mgr_with_tempdir(cfg: McpConfig) -> (McpRuntimeManager, tempfile::TempDir) { let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - (McpRuntimeManager::from_config_with_auth_path(cfg, path), dir) + let mgr = McpRuntimeManager::from_config_with_auth_path(cfg, dir.path().join("auth.json")); + (mgr, dir) } #[tokio::test] From 09539858cef83288faf61e9bdc95df6983348ecb Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 01:54:00 +0000 Subject: [PATCH 50/54] feat(openab-agent/mcp): parse_paste_callback URL helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure URL → authorization-code helper for the upcoming `runtime::complete_login` (ADR §6.4). Validates the `state` echo before returning the `code` so CSRF / cross-flow contamination fails closed before any token-endpoint round-trip. Surfaces an `error=` query param verbatim and tolerates extra parameters (`iss`, vendor tracking) without rejecting valid callbacks. Token exchange + runtime wiring follow in the next slice; helper carries `#[allow(dead_code)]` until that lands so the no-feature build stays warning-clean. --- openab-agent/src/mcp/flow.rs | 84 ++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 3 deletions(-) diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs index 39ed8b13c..7d0fd7c80 100644 --- a/openab-agent/src/mcp/flow.rs +++ b/openab-agent/src/mcp/flow.rs @@ -1,9 +1,9 @@ //! OAuth 2.1 paste-back flow primitives (ADR §6.4). PKCE comes from //! `crate::auth::generate_pkce` — shared with the Codex paths so a -//! security-primitive change can't drift between modules. Orchestration -//! (device polling, callback parsing) lands in subsequent slices. +//! security-primitive change can't drift between modules. Device +//! polling orchestration lands in a subsequent slice. -use anyhow::Result; +use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use url::Url; @@ -57,6 +57,35 @@ pub fn init_paste_authorize( }) } +/// Parse a paste-back callback URL into its authorization `code` after +/// validating the `state` echo. OAuth 2.1 RFC 6749 §10.12 + §4.1.2 — a +/// mismatched `state` indicates CSRF / cross-flow contamination and MUST +/// reject the exchange before any token-endpoint round-trip. Tolerates +/// extra query params (vendor-specific tracking, `iss`, etc.). +#[allow(dead_code)] // wired in next slice (runtime::complete_login) +pub fn parse_paste_callback(redirect_url: &str, expected_state: &str) -> Result { + let url = Url::parse(redirect_url).map_err(|e| anyhow!("invalid redirect URL: {e}"))?; + let mut code = None; + let mut state = None; + let mut error = None; + for (k, v) in url.query_pairs() { + match k.as_ref() { + "code" => code = Some(v.into_owned()), + "state" => state = Some(v.into_owned()), + "error" => error = Some(v.into_owned()), + _ => {} + } + } + if let Some(err) = error { + return Err(anyhow!("authorize endpoint returned error: {err}")); + } + let got_state = state.ok_or_else(|| anyhow!("callback missing state"))?; + if got_state != expected_state { + return Err(anyhow!("state mismatch; flow rejected")); + } + code.ok_or_else(|| anyhow!("callback missing code")) +} + #[cfg(test)] mod tests { use super::*; @@ -138,4 +167,53 @@ mod tests { assert!(r.url.starts_with("https://linear.app/oauth/authorize?")); assert!(r.url.contains("scope=read+write")); } + + #[test] + fn parse_paste_callback_extracts_code_when_state_matches() { + let url = "http://localhost:53692/callback?code=abc123&state=xyz"; + let code = parse_paste_callback(url, "xyz").unwrap(); + assert_eq!(code, "abc123"); + } + + #[test] + fn parse_paste_callback_tolerates_extra_query_params() { + let url = "http://localhost:53692/cb?iss=https%3A%2F%2Fauth&state=s&code=c&tracking=1"; + let code = parse_paste_callback(url, "s").unwrap(); + assert_eq!(code, "c"); + } + + #[test] + fn parse_paste_callback_rejects_state_mismatch() { + let url = "http://localhost:53692/cb?code=c&state=wrong"; + let err = parse_paste_callback(url, "want").unwrap_err().to_string(); + assert!(err.contains("state mismatch"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_rejects_missing_state() { + let url = "http://localhost:53692/cb?code=c"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("missing state"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_rejects_missing_code() { + let url = "http://localhost:53692/cb?state=x"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("missing code"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_surfaces_authorize_error() { + let url = "http://localhost:53692/cb?error=access_denied&state=x"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("access_denied"), "got: {err}"); + } + + #[test] + fn parse_paste_callback_rejects_unparseable_url() { + let url = "not a url"; + let err = parse_paste_callback(url, "x").unwrap_err().to_string(); + assert!(err.contains("invalid redirect URL"), "got: {err}"); + } } From 99cd29c59b174429142743750d029ced80e63f91 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 03:30:53 +0000 Subject: [PATCH 51/54] revert: scope PR #959 to Phase 1 per shaun-agent Balanced split Reverts the 18 OAuth-related commits (2a69bf9..0953985) so this PR contains only ADR + Phase 1 foundation: rmcp dep, mcpServers config loader, stdio transport, meta-tool dispatch, anonymous Streamable HTTP transport body. Phase 2 OAuth work (TokenStore namespacing, provider catalog, paste-back flow primitives, AuthEntry split, parse_paste_callback) is preserved on feat/openab-agent-mcp-oauth-stash and will be proposed as a separate PR once #959 lands. Per shaun-agent auto-screen Balanced recommendation: smaller PRs get reviewed; one mega-PR doesn't. Co-Authored-By: Claude Opus 4.7 --- openab-agent/src/auth.rs | 330 ++-------------------------- openab-agent/src/mcp/config.rs | 135 +----------- openab-agent/src/mcp/flow.rs | 219 ------------------- openab-agent/src/mcp/meta_tool.rs | 1 - openab-agent/src/mcp/mod.rs | 2 - openab-agent/src/mcp/oauth.rs | 351 ------------------------------ openab-agent/src/mcp/runtime.rs | 320 ++------------------------- 7 files changed, 40 insertions(+), 1318 deletions(-) delete mode 100644 openab-agent/src/mcp/flow.rs delete mode 100644 openab-agent/src/mcp/oauth.rs diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index c1cda68a2..385ccede9 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -2,16 +2,11 @@ use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use std::collections::HashMap; use std::io::{BufRead, Write}; use std::net::TcpListener; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; -/// Namespace key for the existing Codex single-tenant credential. -/// Lives next to future `mcp:` entries inside `auth.json`. -const CODEX_NAMESPACE: &str = "codex"; - const REFRESH_SKEW_SECONDS: u64 = 120; const CODEX_AUTHORIZE_URL: &str = "https://auth.openai.com/oauth/authorize"; @@ -39,42 +34,7 @@ pub struct TokenStore { pub provider: String, } -/// Transient per-server state captured at `start_paste_login` and consumed -/// by `complete_login` (ADR §6.4). Lives in `auth.json` under -/// `mcp-pending:`. `token_url` + `provider_name` are snapshotted -/// up front so a config edit between init and finish can't redirect the -/// token exchange. -/// -/// Unconditionally compiled (not behind `mcp` feature) so a non-mcp build -/// can still parse + round-trip an `auth.json` containing pending entries. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -pub struct PendingPasteLogin { - pub verifier: String, - pub state: String, - pub token_url: String, - pub provider_name: String, -} - -/// `auth.json` value type. Untagged Serde enum: `TokenStore` has required -/// `access_token`, `PendingPasteLogin` has required `verifier` — the -/// shapes are disjoint, so deserialization picks the right variant -/// without an explicit tag (and existing files stay byte-compatible). -/// -/// Per Mira's Tick 39 review: option-A (repurposing TokenStore fields for -/// pending state) would have made the refresh task treat pending entries -/// as "expired tokens" and loop on them. The untagged enum keeps the two -/// state machines completely separate. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(untagged)] -pub enum AuthEntry { - Token(TokenStore), - Pending(PendingPasteLogin), -} - -/// Default location of `auth.json`. Exposed so `McpRuntimeManager` can -/// thread the same path into its constructor and tests can inject a -/// tempdir without touching `$HOME` (which would race cross-module). -pub fn auth_path() -> PathBuf { +fn auth_path() -> PathBuf { let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string()); PathBuf::from(home) .join(".openab") @@ -82,36 +42,23 @@ pub fn auth_path() -> PathBuf { .join("auth.json") } -/// Read the `auth.json` map, transparently migrating a legacy single-tenant -/// Codex token file into the new namespaced shape. The migrated map is held -/// in-memory only; the file is rewritten in the new shape on the next save. -/// -/// Discriminates by the top-level `access_token` key — present means the -/// file is the legacy `TokenStore` shape, absent means the new namespaced -/// map. A single JSON parse gives accurate error context either way. -fn read_auth_file(path: &Path) -> Result> { - let data = std::fs::read_to_string(path)?; - let value: serde_json::Value = - serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}"))?; - if value.get("access_token").is_some() { - let legacy: TokenStore = serde_json::from_value(value) - .map_err(|e| anyhow!("Invalid auth.json (legacy format): {e}"))?; - let mut map = HashMap::new(); - map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(legacy)); - return Ok(map); - } - serde_json::from_value(value).map_err(|e| anyhow!("Invalid auth.json: {e}")) +pub fn load_tokens() -> Result { + let path = auth_path(); + let data = std::fs::read_to_string(&path).map_err(|_| { + anyhow!( + "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", + path.display() + ) + })?; + serde_json::from_str(&data).map_err(|e| anyhow!("Invalid auth.json: {e}")) } -/// Atomically replace `auth.json` with the new map. `fsync(2)` after write -/// satisfies the ADR §6.1 refresh-token rotation contract — without it, a -/// Spot interruption between local write and S3 sync would restore a -/// revoked refresh token from durable storage on the next task start. -fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> { +fn save_tokens(store: &TokenStore) -> Result<()> { + let path = auth_path(); if let Some(dir) = path.parent() { std::fs::create_dir_all(dir)?; } - let data = serde_json::to_string_pretty(map)?; + let data = serde_json::to_string_pretty(store)?; #[cfg(unix)] { use std::fs::OpenOptions; @@ -122,134 +69,16 @@ fn write_auth_file(path: &Path, map: &HashMap) -> Result<()> .create(true) .truncate(true) .mode(0o600) - .open(path)?; + .open(&path)?; file.write_all(data.as_bytes())?; - file.sync_all()?; } #[cfg(not(unix))] { - std::fs::write(path, &data)?; + std::fs::write(&path, &data)?; } Ok(()) } -pub fn load_tokens() -> Result { - let path = auth_path(); - let map = read_auth_file(&path).map_err(|_| { - anyhow!( - "No credentials found at {}. Run `openab-agent auth codex-oauth` first.", - path.display() - ) - })?; - match map.get(CODEX_NAMESPACE) { - Some(AuthEntry::Token(t)) => Ok(t.clone()), - _ => Err(anyhow!( - "No codex credentials in {}. Run `openab-agent auth codex-oauth` first.", - path.display() - )), - } -} - -fn save_tokens(store: &TokenStore) -> Result<()> { - let path = auth_path(); - let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(CODEX_NAMESPACE.to_string(), AuthEntry::Token(store.clone())); - write_auth_file(&path, &map) -} - -/// Look up the credential at `key` (e.g. `mcp:linear`). Returns the codex -/// entry for `key = "codex"`, but prefer `load_tokens()` for that path — -/// this helper exists for MCP server-namespaced lookups (ADR §6.1). -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) -pub fn load_namespaced_token(key: &str) -> Result { - let path = auth_path(); - let map = - read_auth_file(&path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; - match map.get(key) { - Some(AuthEntry::Token(t)) => Ok(t.clone()), - Some(AuthEntry::Pending(_)) => Err(anyhow!("{key:?} is a pending login, not a token")), - None => Err(anyhow!("no credentials stored for {key:?}")), - } -} - -/// Insert or replace the credential at `key`, preserving all other entries. -/// Read-modify-write on a single file: callers in the same process must -/// serialize themselves (the lifecycle manager already does per ADR §5.7). -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp/oauth.rs login flow) -pub fn save_namespaced_token(key: &str, store: &TokenStore) -> Result<()> { - let path = auth_path(); - let mut map = read_auth_file(&path).unwrap_or_default(); - map.insert(key.to_string(), AuthEntry::Token(store.clone())); - write_auth_file(&path, &map) -} - -/// Read a `mcp-pending:` entry from `auth.json` (ADR §6.4). Errors -/// if the key holds a token instead — the two namespaces shouldn't -/// collide, but a hand-edited file would. `path` is injected so the -/// runtime manager can point tests at a tempdir; production callers pass -/// `auth_path()`. -#[cfg(feature = "mcp")] -pub fn load_pending_login(path: &Path, key: &str) -> Result { - let map = - read_auth_file(path).map_err(|_| anyhow!("No credentials found at {}", path.display()))?; - match map.get(key) { - Some(AuthEntry::Pending(p)) => Ok(p.clone()), - Some(AuthEntry::Token(_)) => Err(anyhow!("{key:?} is a token, not a pending login")), - None => Err(anyhow!("no pending login for {key:?}")), - } -} - -/// Persist a `PendingPasteLogin` under `mcp-pending:` (ADR §6.4). -/// Read-modify-write — same serialization caveat as `save_namespaced_token`. -#[cfg(feature = "mcp")] -pub fn save_pending_login(path: &Path, key: &str, val: &PendingPasteLogin) -> Result<()> { - let mut map = read_auth_file(path).unwrap_or_default(); - map.insert(key.to_string(), AuthEntry::Pending(val.clone())); - write_auth_file(path, &map) -} - -/// Remove a pending-login entry (consumed on successful `complete_login`, -/// expired entry GC, or `mcp logout`). Idempotent — missing key is OK. -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (complete_login) -pub fn remove_pending_login(path: &Path, key: &str) -> Result<()> { - let mut map = match read_auth_file(path) { - Ok(m) => m, - Err(_) => return Ok(()), - }; - if map.remove(key).is_none() { - return Ok(()); - } - if map.is_empty() { - let _ = std::fs::remove_file(path); - return Ok(()); - } - write_auth_file(path, &map) -} - -/// Remove the credential at `key`. Idempotent — missing key is not an -/// error. If the map becomes empty, the file is deleted so `mcp doctor` -/// can report "no credentials" instead of "empty file". -#[cfg(feature = "mcp")] -#[allow(dead_code)] // wired in next slice (mcp logout / revoked-refresh recovery) -pub fn remove_namespaced_token(key: &str) -> Result<()> { - let path = auth_path(); - let mut map = match read_auth_file(&path) { - Ok(m) => m, - Err(_) => return Ok(()), - }; - if map.remove(key).is_none() { - return Ok(()); - } - if map.is_empty() { - let _ = std::fs::remove_file(&path); - return Ok(()); - } - write_auth_file(&path, &map) -} - fn is_expired(store: &TokenStore) -> bool { let now = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -309,7 +138,7 @@ async fn refresh_token(store: &TokenStore) -> Result { }) } -pub fn generate_pkce() -> (String, String) { +fn generate_pkce() -> (String, String) { let mut buf = [0u8; 32]; getrandom::fill(&mut buf).expect("getrandom failed"); let verifier = URL_SAFE_NO_PAD.encode(buf); @@ -706,129 +535,4 @@ mod tests { let expected = URL_SAFE_NO_PAD.encode(Sha256::digest(verifier.as_bytes())); assert_eq!(challenge, expected); } - - fn token_of(entry: Option<&AuthEntry>) -> &TokenStore { - match entry { - Some(AuthEntry::Token(t)) => t, - other => panic!("expected Token, got {other:?}"), - } - } - - #[test] - fn read_auth_file_migrates_legacy_single_tenant_format() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let legacy = serde_json::to_string_pretty(&make_store(9_999_999_999)).unwrap(); - std::fs::write(&path, legacy).unwrap(); - let map = read_auth_file(&path).unwrap(); - assert_eq!(map.len(), 1); - assert_eq!( - token_of(map.get(CODEX_NAMESPACE)).access_token, - "test_access_token_value" - ); - } - - #[test] - fn read_auth_file_parses_new_namespaced_format() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); - input.insert("mcp:linear".to_string(), AuthEntry::Token(make_store(2))); - write_auth_file(&path, &input).unwrap(); - let map = read_auth_file(&path).unwrap(); - assert_eq!(map.len(), 2); - assert_eq!(token_of(map.get("codex")).expires_at, 1); - assert_eq!(token_of(map.get("mcp:linear")).expires_at, 2); - } - - #[test] - fn write_auth_file_round_trips_through_disk() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert("mcp:github".to_string(), AuthEntry::Token(make_store(42))); - write_auth_file(&path, &input).unwrap(); - let raw = std::fs::read_to_string(&path).unwrap(); - assert!(raw.contains("mcp:github")); - let map = read_auth_file(&path).unwrap(); - assert_eq!(token_of(map.get("mcp:github")).expires_at, 42); - } - - #[cfg(unix)] - #[test] - fn write_auth_file_creates_file_with_0600_mode() { - use std::os::unix::fs::PermissionsExt; - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert("codex".to_string(), AuthEntry::Token(make_store(0))); - write_auth_file(&path, &input).unwrap(); - let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777; - assert_eq!(mode, 0o600, "expected 0600, got {mode:o}"); - } - - fn make_pending() -> PendingPasteLogin { - PendingPasteLogin { - verifier: "test-verifier".to_string(), - state: "test-state".to_string(), - token_url: "https://example.com/token".to_string(), - provider_name: "anthropic-mcp".to_string(), - } - } - - #[test] - fn auth_entry_untagged_round_trip_mixed_shapes() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert("codex".to_string(), AuthEntry::Token(make_store(1))); - input.insert( - "mcp-pending:linear".to_string(), - AuthEntry::Pending(make_pending()), - ); - write_auth_file(&path, &input).unwrap(); - let map = read_auth_file(&path).unwrap(); - assert_eq!(map.len(), 2); - assert_eq!(token_of(map.get("codex")).expires_at, 1); - match map.get("mcp-pending:linear") { - Some(AuthEntry::Pending(p)) => assert_eq!(p.verifier, "test-verifier"), - other => panic!("expected Pending, got {other:?}"), - } - } - - #[cfg(feature = "mcp")] - #[test] - fn pending_login_helpers_round_trip_via_injected_path() { - // Tempdir path injected directly — no HOME-env shimming, so this - // test can't race auth-touching tests in other modules. - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let key = "mcp-pending:test-srv"; - save_pending_login(&path, key, &make_pending()).unwrap(); - let got = load_pending_login(&path, key).unwrap(); - assert_eq!(got, make_pending()); - remove_pending_login(&path, key).unwrap(); - assert!(load_pending_login(&path, key).is_err()); - } - - #[cfg(feature = "mcp")] - #[test] - fn load_namespaced_token_errors_on_pending_entry() { - let dir = tempfile::tempdir().unwrap(); - let path = dir.path().join("auth.json"); - let mut input = HashMap::new(); - input.insert( - "mcp-pending:srv".to_string(), - AuthEntry::Pending(make_pending()), - ); - write_auth_file(&path, &input).unwrap(); - let map = read_auth_file(&path).unwrap(); - // Assert the discriminant directly. `load_namespaced_token` would - // reach into the real `$HOME/.openab/agent/auth.json` and race - // cross-module tests; the variant check is the actual property - // under test. - let pending = map.get("mcp-pending:srv"); - assert!(matches!(pending, Some(AuthEntry::Pending(_)))); - } } diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 003bb5310..742459430 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -57,46 +57,15 @@ pub struct ToolFilter { pub exclude: Vec, } -/// OAuth block. -/// -/// `provider` selects a built-in spec from `oauth::builtin()`. Setting it -/// to an unknown name + supplying `authorize_url` / `token_url` defines a -/// custom OAuth 2.1 provider (ADR §6.3). `discovery: true` opts into -/// RFC 8414 dynamic discovery and requires a non-empty -/// `discovery_allowlist` of domains (§6.4 SSRF guard). -#[derive(Debug, Default, Clone, Serialize, Deserialize)] +/// OAuth block. Phase 1 only parses `provider` + `scopes`; custom-provider +/// fields (§6.3: `authorize_url`, `token_url`, `device_authorization_endpoint`, +/// `discovery`, `discovery_allowlist`) land with the Phase 2 auth slice. +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct OAuthConfig { #[serde(default)] pub provider: Option, #[serde(default)] pub scopes: Vec, - #[serde(default)] - pub authorize_url: Option, - #[serde(default)] - pub token_url: Option, - #[serde(default)] - pub client_id: Option, - #[serde(default)] - pub device_authorization_endpoint: Option, - #[serde(default)] - pub discovery: bool, - #[serde(default)] - pub discovery_allowlist: Vec, -} - -impl OAuthConfig { - /// Boot-time validation (ADR §6.3 / §6.4). `discovery: true` without an - /// explicit allowlist is rejected — RFC 8414 lookups in multi-tenant - /// deployments would otherwise become an SSRF vector. - pub fn validate(&self, server: &str) -> Result<()> { - if self.discovery && self.discovery_allowlist.is_empty() { - return Err(anyhow!( - "mcp server {server:?}: oauth.discovery=true requires \ - a non-empty oauth.discovery_allowlist (ADR §6.3)" - )); - } - Ok(()) - } } impl McpConfig { @@ -120,24 +89,9 @@ impl McpConfig { let layer = Self::load_file(path)?; merged.servers.extend(layer.servers); } - merged.validate()?; Ok(merged) } - /// Validate every server's `oauth` block (ADR §6.3 boot check). Returns - /// the first failure — finer-grained per-server isolation lives in §5.6. - pub fn validate(&self) -> Result<()> { - for (name, server) in &self.servers { - if let ServerConfig::Http { - oauth: Some(oauth), .. - } = server - { - oauth.validate(name)?; - } - } - Ok(()) - } - fn load_file(path: &Path) -> Result { let raw = std::fs::read_to_string(path) .with_context(|| format!("read mcp config {}", path.display()))?; @@ -333,85 +287,4 @@ mod tests { _ => unreachable!(), } } - - #[test] - fn parses_custom_oauth_provider_fields() { - let json = r#"{ - "mcpServers": { - "custom": { - "type": "http", - "url": "https://example.com/mcp", - "oauth": { - "provider": "custom", - "authorize_url": "https://example.com/oauth/authorize", - "token_url": "https://example.com/oauth/token", - "client_id": "abc123", - "device_authorization_endpoint": "https://example.com/oauth/device", - "discovery": true, - "discovery_allowlist": ["*.example.com"] - } - } - } - }"#; - let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let ServerConfig::Http { - oauth: Some(oauth), .. - } = cfg.servers.get("custom").unwrap() - else { - panic!("expected http with oauth"); - }; - assert_eq!( - oauth.authorize_url.as_deref(), - Some("https://example.com/oauth/authorize"), - ); - assert_eq!( - oauth.token_url.as_deref(), - Some("https://example.com/oauth/token"), - ); - assert_eq!(oauth.client_id.as_deref(), Some("abc123")); - assert_eq!( - oauth.device_authorization_endpoint.as_deref(), - Some("https://example.com/oauth/device"), - ); - assert!(oauth.discovery); - assert_eq!(oauth.discovery_allowlist, vec!["*.example.com".to_string()]); - } - - #[test] - fn validate_rejects_discovery_without_allowlist() { - let oauth = OAuthConfig { - provider: Some("custom".into()), - discovery: true, - ..Default::default() - }; - let err = oauth.validate("srv").unwrap_err().to_string(); - assert!(err.contains("discovery_allowlist"), "got: {err}"); - assert!(err.contains("srv"), "got: {err}"); - } - - #[test] - fn validate_accepts_discovery_with_allowlist() { - let oauth = OAuthConfig { - provider: Some("custom".into()), - discovery: true, - discovery_allowlist: vec!["*.example.com".into()], - ..Default::default() - }; - oauth.validate("srv").unwrap(); - } - - #[test] - fn load_layered_rejects_invalid_discovery_config() { - let dir = tempfile::tempdir().unwrap(); - let project = dir.path().join("project.json"); - std::fs::write( - &project, - r#"{"mcpServers":{"bad":{"type":"http","url":"https://example.com","oauth":{"provider":"custom","discovery":true}}}}"#, - ) - .unwrap(); - let err = McpConfig::load_layered(None, Some(&project)) - .unwrap_err() - .to_string(); - assert!(err.contains("discovery_allowlist"), "got: {err}"); - } } diff --git a/openab-agent/src/mcp/flow.rs b/openab-agent/src/mcp/flow.rs deleted file mode 100644 index 7d0fd7c80..000000000 --- a/openab-agent/src/mcp/flow.rs +++ /dev/null @@ -1,219 +0,0 @@ -//! OAuth 2.1 paste-back flow primitives (ADR §6.4). PKCE comes from -//! `crate::auth::generate_pkce` — shared with the Codex paths so a -//! security-primitive change can't drift between modules. Device -//! polling orchestration lands in a subsequent slice. - -use anyhow::{anyhow, Result}; -use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine}; -use url::Url; - -use super::oauth::ResolvedProvider; -use crate::auth::generate_pkce; - -/// 16-byte URL-safe `state` nonce for the OAuth authorize URL. -fn generate_state() -> String { - let mut buf = [0u8; 16]; - getrandom::fill(&mut buf).expect("getrandom failed"); - URL_SAFE_NO_PAD.encode(buf) -} - -/// Result of `init_paste_authorize`: the URL to surface to the user, plus -/// the `code_verifier` + `state` the caller must persist under the -/// pending-login key for `complete_login` to validate the callback. -pub struct PasteAuthorize { - pub url: String, - pub code_verifier: String, - pub state: String, -} - -/// Start a paste-back OAuth 2.1 authorize flow. Generates the PKCE pair -/// and state nonce internally so the caller can't pair them up wrong; -/// builds the RFC 6749 authorize URL with `S256` PKCE and space-joined -/// scopes. `client_id` is caller-supplied: built-ins look it up via a -/// hard-coded helper (mirroring `auth::codex_client_id`); custom -/// providers carry it on `ResolvedProvider::Custom`. `redirect_uri` is -/// the provider's pinned callback for built-ins or a runtime-bound -/// `localhost:` for custom paste-back flows. -pub fn init_paste_authorize( - provider: &ResolvedProvider, - client_id: &str, - redirect_uri: &str, -) -> Result { - let (code_verifier, code_challenge) = generate_pkce(); - let state = generate_state(); - let mut url = Url::parse(provider.authorize_url())?; - url.query_pairs_mut() - .append_pair("response_type", "code") - .append_pair("client_id", client_id) - .append_pair("redirect_uri", redirect_uri) - .append_pair("code_challenge", &code_challenge) - .append_pair("code_challenge_method", "S256") - .append_pair("state", &state) - .append_pair("scope", &provider.scopes().join(" ")); - Ok(PasteAuthorize { - url: url.to_string(), - code_verifier, - state, - }) -} - -/// Parse a paste-back callback URL into its authorization `code` after -/// validating the `state` echo. OAuth 2.1 RFC 6749 §10.12 + §4.1.2 — a -/// mismatched `state` indicates CSRF / cross-flow contamination and MUST -/// reject the exchange before any token-endpoint round-trip. Tolerates -/// extra query params (vendor-specific tracking, `iss`, etc.). -#[allow(dead_code)] // wired in next slice (runtime::complete_login) -pub fn parse_paste_callback(redirect_url: &str, expected_state: &str) -> Result { - let url = Url::parse(redirect_url).map_err(|e| anyhow!("invalid redirect URL: {e}"))?; - let mut code = None; - let mut state = None; - let mut error = None; - for (k, v) in url.query_pairs() { - match k.as_ref() { - "code" => code = Some(v.into_owned()), - "state" => state = Some(v.into_owned()), - "error" => error = Some(v.into_owned()), - _ => {} - } - } - if let Some(err) = error { - return Err(anyhow!("authorize endpoint returned error: {err}")); - } - let got_state = state.ok_or_else(|| anyhow!("callback missing state"))?; - if got_state != expected_state { - return Err(anyhow!("state mismatch; flow rejected")); - } - code.ok_or_else(|| anyhow!("callback missing code")) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::mcp::config::OAuthConfig; - use crate::mcp::oauth::resolve; - - const TEST_REDIRECT: &str = "http://localhost:53692/callback"; - - #[test] - fn state_is_url_safe_and_unique() { - let s = generate_state(); - let url_safe = s - .chars() - .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'); - assert!(url_safe); - assert_ne!(s, generate_state()); - } - - fn builtin_provider() -> ResolvedProvider { - let cfg = OAuthConfig { - provider: Some("anthropic-mcp".to_string()), - ..Default::default() - }; - resolve(&cfg).unwrap() - } - - #[test] - fn init_paste_authorize_threads_pkce_and_state_into_url() { - let p = builtin_provider(); - let r = init_paste_authorize(&p, "client-xyz", TEST_REDIRECT).unwrap(); - assert!(r.url.starts_with("https://claude.ai/oauth/authorize?")); - assert!(r.url.contains("response_type=code")); - assert!(r.url.contains("client_id=client-xyz")); - assert!(r.url.contains("code_challenge_method=S256")); - assert!(r.url.contains(&format!("state={}", r.state))); - assert!(!r.code_verifier.is_empty()); - } - - #[test] - fn init_paste_authorize_percent_encodes_redirect_uri() { - let p = builtin_provider(); - let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); - let want = "redirect_uri=http%3A%2F%2Flocalhost%3A53692%2Fcallback"; - assert!(r.url.contains(want)); - } - - #[test] - fn init_paste_authorize_form_encodes_scope_spaces_as_plus() { - let p = builtin_provider(); - let r = init_paste_authorize(&p, "c", TEST_REDIRECT).unwrap(); - assert!(r.url.contains("scope=org%3Acreate_api_key")); - assert!(r.url.contains("user%3Amcp_servers")); - } - - #[test] - fn init_paste_authorize_rejects_unparseable_authorize_url() { - let cfg = OAuthConfig { - provider: Some("broken".to_string()), - authorize_url: Some("not a url".to_string()), - token_url: Some("https://example.com/token".to_string()), - ..Default::default() - }; - let p = resolve(&cfg).unwrap(); - assert!(init_paste_authorize(&p, "c", TEST_REDIRECT).is_err()); - } - - #[test] - fn init_paste_authorize_for_custom_provider() { - let cfg = OAuthConfig { - provider: Some("linear".to_string()), - authorize_url: Some("https://linear.app/oauth/authorize".to_string()), - token_url: Some("https://api.linear.app/oauth/token".to_string()), - client_id: Some("linear-client".to_string()), - scopes: vec!["read".to_string(), "write".to_string()], - ..Default::default() - }; - let p = resolve(&cfg).unwrap(); - let r = init_paste_authorize(&p, "linear-client", TEST_REDIRECT).unwrap(); - assert!(r.url.starts_with("https://linear.app/oauth/authorize?")); - assert!(r.url.contains("scope=read+write")); - } - - #[test] - fn parse_paste_callback_extracts_code_when_state_matches() { - let url = "http://localhost:53692/callback?code=abc123&state=xyz"; - let code = parse_paste_callback(url, "xyz").unwrap(); - assert_eq!(code, "abc123"); - } - - #[test] - fn parse_paste_callback_tolerates_extra_query_params() { - let url = "http://localhost:53692/cb?iss=https%3A%2F%2Fauth&state=s&code=c&tracking=1"; - let code = parse_paste_callback(url, "s").unwrap(); - assert_eq!(code, "c"); - } - - #[test] - fn parse_paste_callback_rejects_state_mismatch() { - let url = "http://localhost:53692/cb?code=c&state=wrong"; - let err = parse_paste_callback(url, "want").unwrap_err().to_string(); - assert!(err.contains("state mismatch"), "got: {err}"); - } - - #[test] - fn parse_paste_callback_rejects_missing_state() { - let url = "http://localhost:53692/cb?code=c"; - let err = parse_paste_callback(url, "x").unwrap_err().to_string(); - assert!(err.contains("missing state"), "got: {err}"); - } - - #[test] - fn parse_paste_callback_rejects_missing_code() { - let url = "http://localhost:53692/cb?state=x"; - let err = parse_paste_callback(url, "x").unwrap_err().to_string(); - assert!(err.contains("missing code"), "got: {err}"); - } - - #[test] - fn parse_paste_callback_surfaces_authorize_error() { - let url = "http://localhost:53692/cb?error=access_denied&state=x"; - let err = parse_paste_callback(url, "x").unwrap_err().to_string(); - assert!(err.contains("access_denied"), "got: {err}"); - } - - #[test] - fn parse_paste_callback_rejects_unparseable_url() { - let url = "not a url"; - let err = parse_paste_callback(url, "x").unwrap_err().to_string(); - assert!(err.contains("invalid redirect URL"), "got: {err}"); - } -} diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 7ecfe0034..557badf4c 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -186,7 +186,6 @@ fn status_label(status: &ServerStatus) -> &'static str { ServerStatus::Disconnected => "disconnected", ServerStatus::Connecting => "connecting", ServerStatus::Connected => "connected", - ServerStatus::NeedsAuth => "needs_auth", ServerStatus::Failed(_) => "failed", } } diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 81278aa4e..55f210c16 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -1,9 +1,7 @@ //! Native MCP client. See `docs/adr/openab-agent-mcp.md`. pub mod config; -pub mod flow; pub mod meta_tool; -pub mod oauth; pub mod runtime; use serde_json::json; diff --git a/openab-agent/src/mcp/oauth.rs b/openab-agent/src/mcp/oauth.rs deleted file mode 100644 index f3ea31661..000000000 --- a/openab-agent/src/mcp/oauth.rs +++ /dev/null @@ -1,351 +0,0 @@ -//! OAuth provider catalog (ADR §6.2) + custom-provider resolution (§6.3). -//! Wiring into the rmcp Streamable HTTP transport + agent-guided flows -//! (§6.4) lands in subsequent slices; this module is the data layer the -//! login / refresh code will dispatch through. - -// The §6.4 login slice is the first prod caller — until then, every item -// here is reachable only via the unit tests below, so `cargo clippy -// --features mcp -- -D warnings` would flag them as dead. Module-scope -// allow rather than per-item once that slice lands. -#![allow(dead_code)] - -use anyhow::{anyhow, Result}; - -use super::config::OAuthConfig; - -/// Static description of a single built-in OAuth provider. `default_scopes` -/// is the minimum set the agent will request when `oauth.scopes` is omitted -/// from the server config; per-server overrides win when present. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct ProviderSpec { - pub name: &'static str, - pub authorize_url: &'static str, - pub token_url: &'static str, - pub callback: &'static str, - pub default_scopes: &'static [&'static str], -} - -/// Anthropic MCP (claude.ai). Scope list from ADR §6.2 — `org:create_api_key` -/// is the broadest grant; consumers should narrow via per-server overrides. -pub const ANTHROPIC_MCP: ProviderSpec = ProviderSpec { - name: "anthropic-mcp", - authorize_url: "https://claude.ai/oauth/authorize", - token_url: "https://platform.claude.com/v1/oauth/token", - callback: "http://localhost:53692/callback", - default_scopes: &[ - "org:create_api_key", - "user:profile", - "user:inference", - "user:sessions:claude_code", - "user:mcp_servers", - "user:file_upload", - ], -}; - -const BUILTINS: &[ProviderSpec] = &[ANTHROPIC_MCP]; - -/// Look up a built-in `ProviderSpec` by config name. Returns `None` for -/// custom providers (§6.3) and for unknown names. -pub fn builtin(name: &str) -> Option { - BUILTINS.iter().copied().find(|spec| spec.name == name) -} - -/// Resolve a built-in provider's OAuth `client_id`. Mirrors -/// `auth::codex_client_id`'s env-var-override pattern but without a hard- -/// coded default — the Anthropic MCP public client_id isn't yet pinned in -/// this repo, so requiring the env var fails fast with a useful error -/// rather than silently dialing with a placeholder. Replace with a -/// hard-coded default once a real value is published. -pub fn builtin_client_id(provider: &str) -> Result { - let env_var = match provider { - "anthropic-mcp" => "OPENAB_MCP_ANTHROPIC_CLIENT_ID", - other => { - return Err(anyhow!( - "no built-in client_id mapping for provider {other:?}" - )); - } - }; - std::env::var(env_var).map_err(|_| { - anyhow!( - "built-in provider {provider:?} requires env var {env_var} \ - (client_id of the provider's OAuth app)" - ) - }) -} - -/// Effective per-server OAuth parameters after resolving the built-in catalog -/// and `OAuthConfig` overrides. -/// -/// The two variants encode invariants that an `Option`-heavy struct couldn't: -/// built-ins always pin a `callback` (their PKCE port is hard-coded in the -/// provider's app registration) and never carry a `client_id` (the §6.4 flow -/// code owns it, mirroring `auth.rs::codex_client_id()`). Custom providers -/// flip both: §6.4 allocates a free port at login time, and `client_id` -/// comes from config (OAuth 2.1 public clients vary on registration). -/// -/// `device_authorization_endpoint` only appears on `Custom` — adding device -/// support for a built-in provider is a `ProviderSpec` schema change, not a -/// config flag. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ResolvedProvider { - Builtin { - provider_name: &'static str, - authorize_url: &'static str, - token_url: &'static str, - callback: &'static str, - scopes: Vec, - }, - Custom { - provider_name: String, - authorize_url: String, - token_url: String, - client_id: Option, - device_authorization_endpoint: Option, - scopes: Vec, - }, -} - -impl ResolvedProvider { - /// Accessor for the shared `authorize_url` field. Callers that don't - /// need to distinguish built-in vs custom can skip the `match`. - pub fn authorize_url(&self) -> &str { - match self { - Self::Builtin { authorize_url, .. } => authorize_url, - Self::Custom { authorize_url, .. } => authorize_url, - } - } - - /// Accessor for the shared `token_url` field. - pub fn token_url(&self) -> &str { - match self { - Self::Builtin { token_url, .. } => token_url, - Self::Custom { token_url, .. } => token_url, - } - } - - /// Accessor for the shared scope list. - pub fn scopes(&self) -> &[String] { - match self { - Self::Builtin { scopes, .. } | Self::Custom { scopes, .. } => scopes, - } - } -} - -/// Resolve a server's `oauth:` block. Built-in providers come from -/// `builtin()`; unknown providers fall through to the §6.3 custom path, -/// which requires `authorize_url` + `token_url` on the config. -/// -/// `OAuthConfig::scopes`, when non-empty, replaces the built-in defaults -/// entirely — the caller never needs to merge. -pub fn resolve(cfg: &OAuthConfig) -> Result { - let provider = cfg - .provider - .as_deref() - .ok_or_else(|| anyhow!("oauth.provider is required"))?; - match builtin(provider) { - Some(spec) => Ok(resolve_builtin(spec, cfg)), - None => resolve_custom(provider, cfg), - } -} - -fn resolve_builtin(spec: ProviderSpec, cfg: &OAuthConfig) -> ResolvedProvider { - let scopes = if cfg.scopes.is_empty() { - spec.default_scopes.iter().map(|s| s.to_string()).collect() - } else { - cfg.scopes.clone() - }; - ResolvedProvider::Builtin { - provider_name: spec.name, - authorize_url: spec.authorize_url, - token_url: spec.token_url, - callback: spec.callback, - scopes, - } -} - -fn resolve_custom(provider: &str, cfg: &OAuthConfig) -> Result { - let authorize_url = cfg.authorize_url.clone().ok_or_else(|| { - anyhow!("custom oauth provider {provider:?}: oauth.authorize_url is required (ADR §6.3)") - })?; - let token_url = cfg.token_url.clone().ok_or_else(|| { - anyhow!("custom oauth provider {provider:?}: oauth.token_url is required (ADR §6.3)") - })?; - Ok(ResolvedProvider::Custom { - provider_name: provider.to_string(), - authorize_url, - token_url, - client_id: cfg.client_id.clone(), - device_authorization_endpoint: cfg.device_authorization_endpoint.clone(), - scopes: cfg.scopes.clone(), - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - // Both env-touching tests below race the same OS env var; serialize - // them per the runbook's Tick 24 lesson (acp.rs ANTHROPIC_API_KEY race). - static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); - - #[test] - fn builtin_client_id_requires_env_var() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // SAFETY: serialized via ENV_LOCK; isolated env key. - unsafe { - std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); - } - let err = builtin_client_id("anthropic-mcp").unwrap_err().to_string(); - assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); - } - - #[test] - fn builtin_client_id_uses_env_var_when_set() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // SAFETY: serialized via ENV_LOCK; isolated env key. - unsafe { - std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-test-id"); - } - let id = builtin_client_id("anthropic-mcp").unwrap(); - assert_eq!(id, "anth-test-id"); - unsafe { - std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); - } - } - - #[test] - fn builtin_client_id_rejects_unknown_provider() { - let err = builtin_client_id("does-not-exist").unwrap_err().to_string(); - assert!(err.contains("does-not-exist"), "got: {err}"); - } - - #[test] - fn anthropic_mcp_spec_matches_adr_table() { - let spec = builtin("anthropic-mcp").expect("anthropic-mcp is built-in"); - assert_eq!(spec.authorize_url, "https://claude.ai/oauth/authorize"); - assert_eq!(spec.token_url, "https://platform.claude.com/v1/oauth/token"); - assert_eq!(spec.callback, "http://localhost:53692/callback"); - assert!(spec.default_scopes.contains(&"user:mcp_servers")); - } - - #[test] - fn unknown_provider_returns_none() { - assert!(builtin("does-not-exist").is_none()); - assert!(builtin("").is_none()); - } - - #[test] - fn resolve_builtin_uses_default_scopes_when_config_omits_them() { - let cfg = OAuthConfig { - provider: Some("anthropic-mcp".to_string()), - ..Default::default() - }; - let ResolvedProvider::Builtin { - provider_name, - callback, - scopes, - .. - } = resolve(&cfg).unwrap() - else { - panic!("expected Builtin variant"); - }; - assert_eq!(provider_name, "anthropic-mcp"); - assert_eq!(callback, ANTHROPIC_MCP.callback); - assert_eq!(scopes.len(), ANTHROPIC_MCP.default_scopes.len()); - } - - #[test] - fn resolve_builtin_uses_config_scopes_when_provided() { - let cfg = OAuthConfig { - provider: Some("anthropic-mcp".to_string()), - scopes: vec!["user:profile".to_string(), "user:inference".to_string()], - ..Default::default() - }; - let r = resolve(&cfg).unwrap(); - assert_eq!(r.scopes(), &["user:profile", "user:inference"]); - } - - #[test] - fn resolve_rejects_missing_provider() { - let err = resolve(&OAuthConfig::default()).unwrap_err().to_string(); - assert!(err.contains("required"), "got: {err}"); - } - - #[test] - fn resolve_custom_uses_config_urls_and_propagates_device_endpoint() { - let cfg = OAuthConfig { - provider: Some("linear".to_string()), - authorize_url: Some("https://linear.app/oauth/authorize".to_string()), - token_url: Some("https://api.linear.app/oauth/token".to_string()), - client_id: Some("client-abc".to_string()), - device_authorization_endpoint: Some("https://linear.app/oauth/device".to_string()), - scopes: vec!["read".to_string(), "write".to_string()], - ..Default::default() - }; - let ResolvedProvider::Custom { - provider_name, - authorize_url, - token_url, - client_id, - device_authorization_endpoint, - scopes, - } = resolve(&cfg).unwrap() - else { - panic!("expected Custom variant"); - }; - assert_eq!(provider_name, "linear"); - assert_eq!(authorize_url, "https://linear.app/oauth/authorize"); - assert_eq!(token_url, "https://api.linear.app/oauth/token"); - assert_eq!(client_id.as_deref(), Some("client-abc")); - assert_eq!( - device_authorization_endpoint.as_deref(), - Some("https://linear.app/oauth/device"), - ); - assert_eq!(scopes, vec!["read", "write"]); - } - - #[test] - fn resolve_custom_minimal_two_urls_only() { - let cfg = OAuthConfig { - provider: Some("acme".to_string()), - authorize_url: Some("https://acme.example/authorize".to_string()), - token_url: Some("https://acme.example/token".to_string()), - ..Default::default() - }; - let ResolvedProvider::Custom { - client_id, - device_authorization_endpoint, - scopes, - .. - } = resolve(&cfg).unwrap() - else { - panic!("expected Custom variant"); - }; - assert!(client_id.is_none()); - assert!(device_authorization_endpoint.is_none()); - assert!(scopes.is_empty()); - } - - #[test] - fn resolve_custom_rejects_missing_authorize_url() { - let cfg = OAuthConfig { - provider: Some("custom".to_string()), - token_url: Some("https://example.com/token".to_string()), - ..Default::default() - }; - let err = resolve(&cfg).unwrap_err().to_string(); - assert!(err.contains("authorize_url"), "got: {err}"); - assert!(err.contains("custom"), "got: {err}"); - } - - #[test] - fn resolve_custom_rejects_missing_token_url() { - let cfg = OAuthConfig { - provider: Some("custom".to_string()), - authorize_url: Some("https://example.com/authorize".to_string()), - ..Default::default() - }; - let err = resolve(&cfg).unwrap_err().to_string(); - assert!(err.contains("token_url"), "got: {err}"); - } -} diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 9e8a517e1..933fd23d5 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -13,7 +13,6 @@ //! the duration of a child-process spawn + handshake. use std::collections::HashMap; -use std::path::PathBuf; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; @@ -24,16 +23,13 @@ use tokio::process::Command; use tokio::sync::RwLock; use super::config::{McpConfig, ServerConfig}; -use super::flow::init_paste_authorize; -use super::oauth::{builtin_client_id, resolve, ResolvedProvider}; -use crate::auth::{auth_path, load_pending_login, save_pending_login, PendingPasteLogin}; +#[allow(dead_code)] // NeedsAuth lands with the Phase 2 OAuth slice (ADR §5.7) #[derive(Debug, Clone, PartialEq, Eq)] pub enum ServerStatus { Disconnected, Connecting, Connected, - NeedsAuth, Failed(String), } @@ -43,7 +39,6 @@ impl ServerStatus { ServerStatus::Disconnected => "○", ServerStatus::Connecting => "◐", ServerStatus::Connected => "●", - ServerStatus::NeedsAuth => "◌", ServerStatus::Failed(_) => "✗", } } @@ -71,33 +66,15 @@ impl std::fmt::Debug for ServerHandle { } } -/// Public return of `start_paste_login`. The caller relays `authorize_url` -/// to the user; `state` is echoed so the agent can show / log it without -/// reaching into runtime internals. -#[derive(Debug, Clone)] -#[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) -pub struct PasteLoginStart { - pub authorize_url: String, - pub state: String, -} - /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone)] pub struct McpRuntimeManager { handles: Arc>>, - /// `auth.json` location used for `mcp-pending:` persistence. - /// Injectable so tests can point at a tempdir instead of `$HOME`, - /// avoiding cross-module HOME-env races (Tick 24 lesson + ADR §6.4). - auth_path: PathBuf, } impl McpRuntimeManager { pub fn from_config(cfg: McpConfig) -> Self { - Self::from_config_with_auth_path(cfg, auth_path()) - } - - pub fn from_config_with_auth_path(cfg: McpConfig, auth_path: PathBuf) -> Self { let handles: HashMap<_, _> = cfg .servers .into_iter() @@ -113,7 +90,6 @@ impl McpRuntimeManager { .collect(); Self { handles: Arc::new(RwLock::new(handles)), - auth_path, } } @@ -170,93 +146,9 @@ impl McpRuntimeManager { out } - /// Begin a paste-back OAuth login for an HTTP server with an `oauth:` - /// block (ADR §6.4). Produces the authorize URL the agent surfaces to - /// the user; the matching PKCE verifier + `state` nonce are persisted - /// under `mcp-pending:` in `auth.json` for `complete_login` - /// (next slice) to consume. - /// - /// Scoped to **built-in** providers this slice. Custom-provider - /// paste-back needs runtime port allocation for the callback (§6.4), - /// and any provider that advertises a `device_authorization_endpoint` - /// should run device-code instead (§6.4 selection logic). Both errors - /// are explicit so the LLM can pick a different action. - #[allow(dead_code)] // wired in next slice (mcp::login meta-tool action) - pub async fn start_paste_login(&self, name: &str) -> Result { - let oauth_cfg = { - let guard = self.handles.read().await; - let handle = guard - .get(name) - .ok_or_else(|| anyhow!("no mcp server named {name:?}"))?; - match handle.config.resolved(name)? { - ServerConfig::Http { - oauth: Some(oauth), .. - } => oauth, - ServerConfig::Http { oauth: None, .. } => { - return Err(anyhow!("mcp server {name:?} has no oauth block")); - } - ServerConfig::Stdio { .. } => { - return Err(anyhow!("mcp server {name:?} is stdio, not http+oauth")); - } - } - }; - - let provider = resolve(&oauth_cfg)?; - let (client_id, redirect_uri) = match &provider { - ResolvedProvider::Builtin { - provider_name, - callback, - .. - } => (builtin_client_id(provider_name)?, (*callback).to_string()), - ResolvedProvider::Custom { - device_authorization_endpoint: Some(_), - .. - } => { - return Err(anyhow!( - "mcp server {name:?} has a device endpoint; use device flow" - )); - } - ResolvedProvider::Custom { .. } => { - return Err(anyhow!( - "mcp server {name:?}: custom-provider paste-back not yet supported" - )); - } - }; - - let started = init_paste_authorize(&provider, &client_id, &redirect_uri)?; - let pending = PendingPasteLogin { - verifier: started.code_verifier, - state: started.state.clone(), - token_url: provider.token_url().to_string(), - provider_name: provider_name_of(&provider), - }; - save_pending_login(&self.auth_path, &pending_key(name), &pending)?; - { - let mut handles = self.handles.write().await; - if let Some(handle) = handles.get_mut(name) { - handle.status = ServerStatus::NeedsAuth; - } - } - Ok(PasteLoginStart { - authorize_url: started.url, - state: started.state, - }) - } - - /// Read the on-disk pending paste-login for `name`. `None` if there's - /// no entry or the file is unreadable; `complete_login` (next slice) - /// is the intended consumer and will distinguish the cases via the - /// `auth::load_pending_login` error message. - #[allow(dead_code)] // first prod caller is complete_login in next slice - pub async fn pending_paste_login(&self, name: &str) -> Option { - load_pending_login(&self.auth_path, &pending_key(name)).ok() - } - /// Lazy-connect the named server (ADR §5.7). Idempotent if already - /// `Connected` with a live client. HTTP servers with an `oauth:` block - /// are routed through `mcp login` first — `connect` marks them - /// `NeedsAuth` and returns an error pointing the caller at the login - /// subcommand rather than attempting an unauthenticated dial. + /// `Connected` with a live client. HTTP servers requiring OAuth are + /// rejected until the Phase 2 auth slice lands (ADR §6). pub async fn connect(&self, name: &str) -> Result<()> { let dial = { let mut guard = self.handles.write().await; @@ -271,16 +163,17 @@ impl McpRuntimeManager { ServerConfig::Stdio { command, args, env, .. } => Dial::Stdio { command, args, env }, - // Oauth-protected servers can't be dialed via plain connect; - // mark `NeedsAuth` so `mcp status` shows a persistent - // "waiting for login" signal (vs `Disconnected`, which - // implies a plain `connect` would succeed). The `Failed` - // path remains reserved for dials that were attempted and - // failed at handshake. - ServerConfig::Http { oauth: Some(_), .. } => { - handle.status = ServerStatus::NeedsAuth; + // Reject oauth-protected servers BEFORE the `Connecting` + // transition: we never attempted a handshake, so leaving + // status at `Disconnected` is the honest state. Status + // becomes `Failed` only when a dial was actually tried. + ServerConfig::Http { + oauth: Some(_), + url, + .. + } => { return Err(anyhow!( - "mcp server {name:?} needs oauth login — run `mcp login {name}`" + "oauth-protected http server {url:?} requires the auth slice (Phase 2 §6)" )); } ServerConfig::Http { url, .. } => Dial::Http { url }, @@ -316,20 +209,6 @@ impl McpRuntimeManager { } } -/// Stringified provider name for the pending-state record. `Builtin` keeps -/// its `&'static str` static; `Custom` already owns a `String`. -fn provider_name_of(provider: &ResolvedProvider) -> String { - match provider { - ResolvedProvider::Builtin { provider_name, .. } => (*provider_name).to_string(), - ResolvedProvider::Custom { provider_name, .. } => provider_name.clone(), - } -} - -/// `auth.json` key for an in-flight paste-login (ADR §6.4 namespace). -fn pending_key(name: &str) -> String { - format!("mcp-pending:{name}") -} - /// Per-transport dial parameters, extracted under the manager's write lock /// then dialed without holding the lock. Flat (no nested `*Dial` structs) /// because two variants don't warrant a dispatch enum. @@ -419,7 +298,7 @@ mod tests { } #[tokio::test] - async fn connect_http_with_oauth_marks_needs_auth() { + async fn connect_http_with_oauth_defers_to_auth_slice() { let json = r#"{ "mcpServers": { "linear": { @@ -432,33 +311,10 @@ mod tests { let cfg: McpConfig = serde_json::from_str(json).unwrap(); let mgr = McpRuntimeManager::from_config(cfg); let err = mgr.connect("linear").await.unwrap_err().to_string(); - assert!(err.contains("needs oauth login"), "expected hint in {err}"); - assert!( - err.contains("mcp login"), - "expected 'mcp login' hint in {err}" - ); - assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); - } - - #[tokio::test] - async fn connect_oauth_twice_keeps_needs_auth_sticky() { - // Second connect() must NOT silently re-enter `Connecting` and - // shadow the user-actionable state — the only path out of - // `NeedsAuth` is a successful `mcp login`. - let json = r#"{ - "mcpServers": { - "linear": { - "type": "http", - "url": "https://mcp.linear.app/mcp", - "oauth": { "provider": "linear" } - } - } - }"#; - let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); - assert!(mgr.connect("linear").await.is_err()); - assert!(mgr.connect("linear").await.is_err()); - assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); + assert!(err.contains("oauth"), "expected 'oauth' in {err}"); + // OAuth rejection happens BEFORE the Connecting transition, so the + // server remains Disconnected — no dial was attempted. + assert_eq!(mgr.statuses().await[0].1, ServerStatus::Disconnected); } #[tokio::test] @@ -481,144 +337,6 @@ mod tests { } } - // start_paste_login + builtin_client_id race on the same env var. - // Same fix as oauth.rs / acp.rs (Tick 24 lesson). - static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); - - fn linear_custom_cfg() -> &'static str { - r#"{ - "mcpServers": { - "linear": { - "type": "http", - "url": "https://mcp.linear.app/mcp", - "oauth": { - "provider": "linear", - "authorize_url": "https://linear.app/oauth/authorize", - "token_url": "https://api.linear.app/oauth/token", - "client_id": "linear-client", - "scopes": ["read"] - } - } - } - }"# - } - - fn anthropic_builtin_cfg() -> &'static str { - r#"{ - "mcpServers": { - "anthro": { - "type": "http", - "url": "https://example.com/mcp", - "oauth": { "provider": "anthropic-mcp" } - } - } - }"# - } - - async fn start_login_err(mgr: &McpRuntimeManager, name: &str) -> String { - mgr.start_paste_login(name).await.unwrap_err().to_string() - } - - fn mgr_with_tempdir(cfg: McpConfig) -> (McpRuntimeManager, tempfile::TempDir) { - let dir = tempfile::tempdir().unwrap(); - let mgr = McpRuntimeManager::from_config_with_auth_path(cfg, dir.path().join("auth.json")); - (mgr, dir) - } - - #[tokio::test] - async fn start_paste_login_builtin_returns_authorize_url_and_pins_pending() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // SAFETY: serialized via ENV_LOCK; isolated env key. - unsafe { - std::env::set_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID", "anth-cid"); - } - let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); - let (mgr, _dir) = mgr_with_tempdir(cfg); - let start = mgr.start_paste_login("anthro").await.unwrap(); - assert!(start - .authorize_url - .starts_with("https://claude.ai/oauth/authorize?")); - assert!(start.authorize_url.contains("client_id=anth-cid")); - assert!(start - .authorize_url - .contains(&format!("state={}", start.state))); - let pending = mgr.pending_paste_login("anthro").await.unwrap(); - assert_eq!(pending.state, start.state); - assert!(!pending.verifier.is_empty()); - assert_eq!( - pending.token_url, - "https://platform.claude.com/v1/oauth/token" - ); - assert_eq!(pending.provider_name, "anthropic-mcp"); - assert_eq!(mgr.statuses().await[0].1, ServerStatus::NeedsAuth); - unsafe { - std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); - } - } - - #[tokio::test] - async fn start_paste_login_rejects_custom_provider_for_now() { - let cfg: McpConfig = serde_json::from_str(linear_custom_cfg()).unwrap(); - let (mgr, _dir) = mgr_with_tempdir(cfg); - let err = start_login_err(&mgr, "linear").await; - assert!(err.contains("custom-provider"), "got: {err}"); - assert!(mgr.pending_paste_login("linear").await.is_none()); - } - - #[tokio::test] - async fn start_paste_login_rejects_custom_with_device_endpoint() { - let json = r#"{ - "mcpServers": { - "dev": { - "type": "http", - "url": "https://example.com/mcp", - "oauth": { - "provider": "dev", - "authorize_url": "https://example.com/oauth/authorize", - "token_url": "https://example.com/oauth/token", - "device_authorization_endpoint": "https://example.com/oauth/device" - } - } - } - }"#; - let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); - let err = start_login_err(&mgr, "dev").await; - assert!(err.contains("device flow"), "got: {err}"); - } - - #[tokio::test] - async fn start_paste_login_rejects_stdio_server() { - let json = r#"{ - "mcpServers": { - "fs": { "type": "stdio", "command": "mcp-server-filesystem" } - } - }"#; - let cfg: McpConfig = serde_json::from_str(json).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); - let err = start_login_err(&mgr, "fs").await; - assert!(err.contains("stdio"), "got: {err}"); - } - - #[tokio::test] - async fn start_paste_login_unknown_server_errors() { - let mgr = McpRuntimeManager::from_config(McpConfig::default()); - let err = start_login_err(&mgr, "ghost").await; - assert!(err.contains("ghost"), "got: {err}"); - } - - #[tokio::test] - async fn start_paste_login_builtin_without_env_var_errors_loud() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - unsafe { - std::env::remove_var("OPENAB_MCP_ANTHROPIC_CLIENT_ID"); - } - let cfg: McpConfig = serde_json::from_str(anthropic_builtin_cfg()).unwrap(); - let mgr = McpRuntimeManager::from_config(cfg); - let err = start_login_err(&mgr, "anthro").await; - assert!(err.contains("OPENAB_MCP_ANTHROPIC_CLIENT_ID"), "got: {err}"); - } - #[tokio::test] async fn connect_to_missing_binary_records_failed() { let json = r#"{ From e90ef01a78dc952d46697869dd030509d2f67b5a Mon Sep 17 00:00:00 2001 From: shaun-agent Date: Mon, 1 Jun 2026 09:24:29 +0000 Subject: [PATCH 52/54] fix(openab-agent/mcp): harden env handling --- openab-agent/Cargo.lock | 704 ++++++++++++++++++++++++++++-- openab-agent/Cargo.toml | 1 + openab-agent/src/acp.rs | 44 +- openab-agent/src/auth.rs | 11 +- openab-agent/src/mcp/config.rs | 13 +- openab-agent/src/mcp/meta_tool.rs | 5 +- openab-agent/src/mcp/runtime.rs | 59 ++- 7 files changed, 775 insertions(+), 62 deletions(-) diff --git a/openab-agent/Cargo.lock b/openab-agent/Cargo.lock index 5f878017f..42ed19750 100644 --- a/openab-agent/Cargo.lock +++ b/openab-agent/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "1.0.0" @@ -67,12 +76,29 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + [[package]] name = "base64" version = "0.22.1" @@ -85,6 +111,24 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + [[package]] name = "bumpalo" version = "3.20.3" @@ -99,9 +143,9 @@ checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" -version = "1.2.62" +version = "1.2.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" dependencies = [ "find-msvc-tools", "shlex", @@ -119,6 +163,20 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + [[package]] name = "clap" version = "4.6.1" @@ -165,11 +223,81 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "crypto-common" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer 0.10.4", + "crypto-common 0.1.7", +] + +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.0", + "const-oid", + "crypto-common 0.2.2", +] + [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", "quote", @@ -219,6 +347,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.32" @@ -226,6 +369,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -234,6 +378,40 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + [[package]] name = "futures-task" version = "0.3.32" @@ -246,12 +424,27 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -352,11 +545,20 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", +] + [[package]] name = "hyper" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" dependencies = [ "atomic-waker", "bytes", @@ -411,6 +613,30 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.2.0" @@ -538,6 +764,25 @@ version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" +[[package]] +name = "is-docker" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928bae27f42bc99b60d9ac7334e3a21d10ad8f1835a4e12ec3ec0464765ed1b3" +dependencies = [ + "once_cell", +] + +[[package]] +name = "is-wsl" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "173609498df190136aa7dea1a91db051746d339e18476eed5ca40521f02d7aa5" +dependencies = [ + "is-docker", + "once_cell", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -624,21 +869,33 @@ dependencies = [ [[package]] name = "memchr" -version = "2.8.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" [[package]] name = "mio" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "wasi", "windows-sys 0.61.2", ] +[[package]] +name = "nix" +version = "0.31.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -648,6 +905,34 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "oauth2" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51e219e79014df21a225b1860a479e2dcd7cbd9130f4defd4bd0e191ea31d67d" +dependencies = [ + "base64", + "chrono", + "getrandom 0.2.17", + "http", + "rand 0.8.6", + "serde", + "serde_json", + "serde_path_to_error", + "sha2 0.10.9", + "thiserror 1.0.69", + "url", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -660,20 +945,39 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "open" +version = "5.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fbaa89d2ddc8473c78a3adf69eea8cffa28c483b8e02a971ef31527cd0fc92c" +dependencies = [ + "is-wsl", + "libc", + "pathdiff", +] + [[package]] name = "openab-agent" version = "0.1.0" dependencies = [ "anyhow", + "base64", "clap", + "getrandom 0.4.2", "libc", - "reqwest", + "open", + "reqwest 0.12.28", + "rmcp", "serde", "serde_json", + "sha2 0.11.0", + "temp-env", "tempfile", "tokio", "tracing", "tracing-subscriber", + "url", + "urlencoding", "uuid", ] @@ -700,6 +1004,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pathdiff" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" + [[package]] name = "percent-encoding" version = "2.3.2" @@ -749,6 +1059,20 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "process-wrap" +version = "9.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e842efad9119158434d193c6682e2ebee4b44d6ad801d7b349623b3f57cdf55" +dependencies = [ + "futures", + "indexmap", + "nix", + "tokio", + "tracing", + "windows", +] + [[package]] name = "quinn" version = "0.11.9" @@ -763,7 +1087,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -778,13 +1102,13 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand", + "rand 0.9.4", "ring", "rustc-hash", "rustls", "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -825,14 +1149,35 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ - "rand_chacha", - "rand_core", + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", ] [[package]] @@ -842,7 +1187,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] @@ -918,6 +1272,40 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "reqwest" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "sync_wrapper", + "tokio", + "tokio-util", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", +] + [[package]] name = "ring" version = "0.17.14" @@ -932,6 +1320,31 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmcp" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0810a9f717d9828f475fe1f629f4c305c8464b7f496c3a854b58d29e65f4058e" +dependencies = [ + "async-trait", + "chrono", + "futures", + "http", + "oauth2", + "pin-project-lite", + "process-wrap", + "reqwest 0.13.4", + "serde", + "serde_json", + "sse-stream", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "url", +] + [[package]] name = "rustc-hash" version = "2.1.2" @@ -1053,6 +1466,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -1065,6 +1489,28 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1076,9 +1522,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.3.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" [[package]] name = "signal-hook-registry" @@ -1104,14 +1550,27 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", "windows-sys 0.61.2", ] +[[package]] +name = "sse-stream" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3962b63f038885f15bce2c6e02c0e7925c072f1ac86bb60fd44c5c6b762fb72" +dependencies = [ + "bytes", + "futures-util", + "http-body", + "http-body-util", + "pin-project-lite", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -1161,6 +1620,15 @@ dependencies = [ "syn", ] +[[package]] +name = "temp-env" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" +dependencies = [ + "parking_lot", +] + [[package]] name = "tempfile" version = "3.27.0" @@ -1174,13 +1642,33 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -1266,6 +1754,30 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "tower" version = "0.5.3" @@ -1378,6 +1890,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typenum" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -1406,8 +1924,15 @@ dependencies = [ "idna", "percent-encoding", "serde", + "serde_derive", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -1422,9 +1947,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.23.1" +version = "1.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -1437,6 +1962,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "want" version = "0.3.1" @@ -1547,6 +2078,19 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wasm-streams" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.244.0" @@ -1588,12 +2132,107 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "windows" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610" +dependencies = [ + "windows-core", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-future" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb" +dependencies = [ + "windows-core", + "windows-link", + "windows-threading", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-numerics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26" +dependencies = [ + "windows-core", + "windows-link", +] + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -1654,6 +2293,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows-threading" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -1875,18 +2523,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" dependencies = [ "proc-macro2", "quote", diff --git a/openab-agent/Cargo.toml b/openab-agent/Cargo.toml index 72edda354..5b091a47e 100644 --- a/openab-agent/Cargo.toml +++ b/openab-agent/Cargo.toml @@ -37,3 +37,4 @@ mcp = ["dep:rmcp"] [dev-dependencies] tempfile = "3" +temp-env = "0.3.6" diff --git a/openab-agent/src/acp.rs b/openab-agent/src/acp.rs index 5d7f4c412..9585612b2 100644 --- a/openab-agent/src/acp.rs +++ b/openab-agent/src/acp.rs @@ -256,13 +256,6 @@ impl AcpServer { #[cfg(test)] mod tests { use super::*; - use std::sync::Mutex; - - /// Serializes tests that mutate process-global env vars (notably - /// `ANTHROPIC_API_KEY`). Without this, `test_session_new` and - /// `test_session_new_missing_key` race on the same key when run in - /// parallel — set/remove from one thread is observed by the other. - static ENV_LOCK: Mutex<()> = Mutex::new(()); #[test] fn test_initialize_response() { @@ -277,11 +270,16 @@ mod tests { #[test] fn test_session_new() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // Set a fake key so from_env() succeeds in CI - unsafe { std::env::set_var("ANTHROPIC_API_KEY", "test-key") }; - let mut server = AcpServer::new(); - let resp_str = server.handle_session_new(2); + let resp_str = temp_env::with_vars( + [ + ("ANTHROPIC_API_KEY", Some("test-key")), + ("OPENAB_AGENT_PROVIDER", None), + ], + || { + let mut server = AcpServer::new(); + server.handle_session_new(2) + }, + ); let resp: Value = serde_json::from_str(&resp_str).unwrap(); assert_eq!(resp["jsonrpc"], "2.0"); assert_eq!(resp["id"], 2); @@ -290,15 +288,19 @@ mod tests { #[test] fn test_session_new_missing_key() { - let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - // Ensure no OAuth token exists either - let auth_path = - std::path::PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string())) - .join(".openab/agent/auth.json"); - let _ = std::fs::remove_file(&auth_path); - unsafe { std::env::remove_var("ANTHROPIC_API_KEY") }; - let mut server = AcpServer::new(); - let resp_str = server.handle_session_new(3); + let tmp = tempfile::TempDir::new().unwrap(); + let home = tmp.path().to_string_lossy().to_string(); + let resp_str = temp_env::with_vars( + [ + ("ANTHROPIC_API_KEY", None), + ("OPENAB_AGENT_PROVIDER", None), + ("HOME", Some(home.as_str())), + ], + || { + let mut server = AcpServer::new(); + server.handle_session_new(3) + }, + ); let resp: Value = serde_json::from_str(&resp_str).unwrap(); assert!(resp["error"].is_object()); assert!(resp["error"]["message"] diff --git a/openab-agent/src/auth.rs b/openab-agent/src/auth.rs index 385ccede9..f34e681aa 100644 --- a/openab-agent/src/auth.rs +++ b/openab-agent/src/auth.rs @@ -517,15 +517,16 @@ mod tests { #[test] fn test_codex_client_id_default() { - unsafe { std::env::remove_var("OPENAB_AGENT_OAUTH_CLIENT_ID") }; - assert_eq!(codex_client_id(), "app_EMoamEEZ73f0CkXaXp7hrann"); + temp_env::with_var("OPENAB_AGENT_OAUTH_CLIENT_ID", None::<&str>, || { + assert_eq!(codex_client_id(), "app_EMoamEEZ73f0CkXaXp7hrann"); + }); } #[test] fn test_codex_client_id_override() { - unsafe { std::env::set_var("OPENAB_AGENT_OAUTH_CLIENT_ID", "custom_id") }; - assert_eq!(codex_client_id(), "custom_id"); - unsafe { std::env::remove_var("OPENAB_AGENT_OAUTH_CLIENT_ID") }; + temp_env::with_var("OPENAB_AGENT_OAUTH_CLIENT_ID", Some("custom_id"), || { + assert_eq!(codex_client_id(), "custom_id"); + }); } #[test] diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 742459430..89734917b 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -105,8 +105,12 @@ impl ServerConfig { /// callers should skip the server and continue (ADR §5.6 "per-server /// failure isolated"). `name` is the server name used in error context. pub fn resolved(&self, name: &str) -> Result { + self.resolved_with_env(name, &std::env::vars().collect()) + } + + fn resolved_with_env(&self, name: &str, env: &HashMap) -> Result { let json = serde_json::to_value(self)?; - let resolved = interpolate_value(json, &std::env::vars().collect()) + let resolved = interpolate_value(json, env) .with_context(|| format!("resolve env for mcp server {name:?}"))?; Ok(serde_json::from_value(resolved)?) } @@ -243,17 +247,14 @@ mod tests { #[test] fn resolved_substitutes_env_in_args() { - // SAFETY: single-threaded test; isolated env key. - unsafe { - std::env::set_var("MCP_TEST_TOKEN", "secret123"); - } + let env = env(&[("MCP_TEST_TOKEN", "secret123")]); let cfg = ServerConfig::Stdio { command: "github-mcp-server".into(), args: vec!["--token".into(), "${env:MCP_TEST_TOKEN}".into()], env: HashMap::new(), tool_filter: None, }; - match cfg.resolved("github").unwrap() { + match cfg.resolved_with_env("github", &env).unwrap() { ServerConfig::Stdio { args, .. } => { assert_eq!(args[1], "secret123"); } diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 557badf4c..22aafc639 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -149,7 +149,10 @@ async fn status(manager: &McpRuntimeManager, filter: Option<&str>) -> Value { let snapshot = manager.snapshot().await; let entries: Vec = snapshot .into_iter() - .filter(|(name, _, _)| filter.is_none_or(|f| f == name.as_str())) + .filter(|(name, _, _)| match filter { + Some(f) => f == name.as_str(), + None => true, + }) .map(|(name, status, transport)| { let last_error = match &status { ServerStatus::Failed(msg) => Some(msg.clone()), diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 933fd23d5..38d03747f 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -228,8 +228,9 @@ impl Dial { match self { Dial::Stdio { command, args, env } => { let cmd = Command::new(&command).configure(|c| { + c.env_clear(); + c.envs(stdio_child_env(&env)); c.args(&args); - c.envs(&env); }); let transport = TokioChildProcess::new(cmd) .with_context(|| format!("spawn mcp child process {command:?}"))?; @@ -247,6 +248,45 @@ impl Dial { } } +fn stdio_child_env(explicit: &HashMap) -> HashMap { + let mut env = baseline_child_env(); + env.extend(explicit.clone()); + env +} + +fn baseline_child_env() -> HashMap { + let mut env = HashMap::new(); + for key in baseline_env_keys() { + if let Ok(val) = std::env::var(key) { + env.insert((*key).to_string(), val); + } + } + env +} + +#[cfg(unix)] +fn baseline_env_keys() -> &'static [&'static str] { + &["HOME", "PATH", "TERM", "USER"] +} + +#[cfg(windows)] +fn baseline_env_keys() -> &'static [&'static str] { + &[ + "HOME", + "PATH", + "TERM", + "USERPROFILE", + "USERNAME", + "SystemRoot", + "SystemDrive", + ] +} + +#[cfg(not(any(unix, windows)))] +fn baseline_env_keys() -> &'static [&'static str] { + &["HOME", "PATH", "TERM"] +} + #[cfg(test)] mod tests { use super::*; @@ -356,4 +396,21 @@ mod tests { other => panic!("expected Failed, got {other:?}"), } } + + #[test] + fn stdio_child_env_keeps_only_baseline_plus_explicit() { + let mut explicit = HashMap::new(); + explicit.insert("MCP_TOKEN".to_string(), "server-token".to_string()); + explicit.insert("PATH".to_string(), "/custom/bin".to_string()); + + let env = stdio_child_env(&explicit); + + assert_eq!( + env.get("MCP_TOKEN").map(String::as_str), + Some("server-token") + ); + assert_eq!(env.get("PATH").map(String::as_str), Some("/custom/bin")); + assert!(!env.contains_key("DISCORD_BOT_TOKEN")); + assert!(!env.contains_key("ANTHROPIC_API_KEY")); + } } From 5af7d86d2ed61c3bd2d10131a59c8e316584a6b7 Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Mon, 1 Jun 2026 17:05:29 +0000 Subject: [PATCH 53/54] fix(openab-agent/mcp): address chaodu PR #959 review (F4/F5/F6/F9/F10 + F2/F7 doc) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code changes: - F6: gate `load_runtime_or_warn()` on `OPENAB_AGENT_MCP={1,true,yes,on}` env var. Compile-time feature flag is being removed in Phase 3 PR #969; this adds the runtime half so MCP stays dormant unless explicitly opted in, preventing accidental activation when mcp.json is present. - F4: integration test in `mcp::runtime::tests` exercising two concurrent `connect()` tasks against a guaranteed-failure server, asserts no stuck-Connecting state and that follow-up dials still attempt fresh. - F5: defensive guard in `Agent::execute_tool_call` for `mcp` tool calls when manager is unavailable — returns actionable error instead of silently falling through to fs-tool dispatch. - F9: regression test for missing-env-var error path; asserts the offender variable name + server name surface in the chained error message. - F10: `truncate_context` drain bounds clarified — `3.min(len)` instead of `(1 + 2).min(len)`, with comment explaining the preserved first message. Doc changes (ADR §5.4.1 added): - F2: rationale for `env_clear()` baseline + explicit `env:` allowlist — blocks token leakage through ambient env, matches MCP SDK behavior. - F7: rationale for single shared `McpRuntimeManager` per process — connection pooling, deduplicated handshakes, lifecycle owned by Agent. --- docs/adr/openab-agent-mcp.md | 8 +++++++ openab-agent/src/agent.rs | 24 ++++++++++++++++++--- openab-agent/src/mcp/config.rs | 26 ++++++++++++++++++++++ openab-agent/src/mcp/mod.rs | 38 ++++++++++++++++++++++++++++----- openab-agent/src/mcp/runtime.rs | 38 +++++++++++++++++++++++++++++++++ 5 files changed, 126 insertions(+), 8 deletions(-) diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md index fc4c895b3..b0b446ee4 100644 --- a/docs/adr/openab-agent-mcp.md +++ b/docs/adr/openab-agent-mcp.md @@ -359,6 +359,14 @@ openab-agent/src/ Estimated total: **500-750 LOC** (no `reload.rs`; per-session refresh handled by `McpRuntimeManager::new()` re-reading config at session start). `llm.rs` is unchanged because both Anthropic and OpenAI Responses providers consume the generic `ToolDef` abstraction. +#### 5.4.1 Runtime activation & isolation choices + +Three intentional choices that surfaced in PR #959 review (chaodu F2 / F6 / F7) and are load-bearing enough to belong in the design contract: + +1. **Runtime opt-in gate (F6, env-only).** `load_runtime_or_warn()` returns `None` unless `OPENAB_AGENT_MCP={1,true,yes,on}` (case-insensitive) is set in the process env, even when `mcp.json` is present. Reasoning: file presence is not a strong enough activation signal — `mcp.json` can land in a deploy tree incidentally (image baseline, project clone) and an unrelated agent shouldn't start spawning third-party child processes. The CLI subcommands (`mcp list / status / connect / doctor`) call `load_config_or_exit` instead and work without the env var so operators can inspect a config before activating it. +2. **Stdio child env scrubbing (F2, intentional security).** `Dial::Stdio` calls `env_clear()` and passes only the 4-var baseline allowlist (`HOME`, `PATH`, `TERM`, `USER` on Unix; Windows equivalents) plus the explicit `env:` map from `mcp.json`. Reasoning: openab-agent inherits high-value secrets from its launcher (`DISCORD_BOT_TOKEN`, `ANTHROPIC_API_KEY`, AWS credentials, GitHub tokens) and stdio MCP servers are third-party binaries with no contractual constraint on what they read from their environment. Leaking those by default is a much larger risk than the convenience of inherited proxy/locale settings. Servers that genuinely need additional env (proxy, certs, locale, provider config) declare them per-server in the config — a future `inherit_env` opt-in list is tracked as follow-up if user demand surfaces. +3. **Per-process shared `McpRuntimeManager` (F7).** A single manager is `Arc`-cloned across all ACP sessions of the same process. Reasoning: MCP servers are expensive to spawn (stdio child fork, HTTP handshake + OAuth) and most are pure-state read-only tools where cross-session visibility is benign. Trade-off: a `mcp connect github` in session A makes the `github` server immediately available in session B. We accept this — per-session isolation would multiply child processes and break the breaker / TTL accounting in §5.7 / §5.9. + ### 5.5 `rmcp` dependency & features ```toml diff --git a/openab-agent/src/agent.rs b/openab-agent/src/agent.rs index 63f240d61..01e4a1bb5 100644 --- a/openab-agent/src/agent.rs +++ b/openab-agent/src/agent.rs @@ -222,8 +222,12 @@ impl Agent { /// first user message and maintaining strict user/assistant alternation. fn truncate_context(&mut self) { while self.messages.len() > MAX_CONTEXT_MESSAGES { - // Drain in pairs (assistant + user) from index 1 to maintain alternation - let end = (1 + 2).min(self.messages.len()); + // Drain a (assistant, user) pair from indices 1..3, preserving + // the original first user message at index 0 so user/assistant + // alternation stays intact. The `min()` guard is defensive — if + // the loop is ever entered with fewer than 3 messages, we drain + // whatever single tail message exists rather than panic. + let end = 3.min(self.messages.len()); self.messages.drain(1..end); } } @@ -233,11 +237,19 @@ impl Agent { /// the routing here (rather than inside `tools.rs`) lets `tools.rs` stay /// stateless and free of MCP/feature plumbing. async fn execute_tool_call(&self, name: &str, input: &serde_json::Value) -> Result { + // Defensive guard (PR #959 chaodu F5): even though the `mcp` tool is + // only registered when a manager is loaded — and the system-prompt + // appendix is gated on `mcp_manager.is_some()` — a sufficiently + // creative LLM could still emit a `mcp(...)` tool call. Surface an + // actionable, non-fatal error so the loop continues instead of + // panicking or leaking an impl-detail message. #[cfg(feature = "mcp")] if name == mcp::MCP_TOOL_NAME { let Some(manager) = self.mcp_manager.as_ref() else { return Err(anyhow::anyhow!( - "mcp tool invoked but no McpRuntimeManager configured" + "tool `mcp` is not available in this session — \ + MCP runtime was not opted in (set `OPENAB_AGENT_MCP=true` \ + and configure `mcp.json`). Do not call `mcp` again." )); }; let action = mcp::meta_tool::Action::deserialize(input) @@ -245,6 +257,12 @@ impl Agent { let value = mcp::meta_tool::dispatch(manager, action).await?; return Ok(serde_json::to_string(&value)?); } + #[cfg(not(feature = "mcp"))] + if name == "mcp" { + return Err(anyhow::anyhow!( + "tool `mcp` is not compiled into this build. Do not call `mcp` again." + )); + } tools::execute_tool(name, input, &self.working_dir).await } diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 89734917b..95a79b817 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -204,6 +204,32 @@ mod tests { assert!(interpolate_env("${env:FOO", &e).is_err()); } + #[test] + fn resolved_errors_on_missing_env_var_with_var_name() { + // chaodu F9 (#959 review): contract is that a missing env var + // referenced via `${env:VAR}` in any config field surfaces an error + // naming the offender, so users can fix `mcp.json` instead of + // chasing a generic parse failure. + let cfg = ServerConfig::Stdio { + command: "github-mcp-server".into(), + args: vec!["--token".into(), "${env:CHAODU_F9_MISSING}".into()], + env: HashMap::new(), + tool_filter: None, + }; + let err = format!( + "{:#}", + cfg.resolved_with_env("github", &env(&[])).unwrap_err() + ); + assert!( + err.contains("CHAODU_F9_MISSING"), + "expected missing var name in error: {err}" + ); + assert!( + err.contains("github"), + "expected server name in error context: {err}" + ); + } + #[test] fn parses_stdio_and_http_servers() { let json = r#"{ diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 55f210c16..36502bd40 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -58,18 +58,46 @@ fn load_config_or_exit() -> McpConfig { }) } +/// Runtime opt-in env var (PR #959 review, chaodu F6). MCP stays dormant +/// unless this is explicitly set to a truthy value, even when `mcp.json` +/// exists at one of the search paths. Prevents accidental activation in +/// environments where the config file might be present incidentally +/// (e.g. project tree copied into a container image, baseline VM rollouts). +pub const OPT_IN_ENV: &str = "OPENAB_AGENT_MCP"; + +/// Returns `true` when the user has explicitly opted into the MCP runtime +/// via `OPENAB_AGENT_MCP={1,true,yes,on}` (case-insensitive). Any other +/// value — including unset, empty, or `false` — keeps MCP dormant. +fn opted_in() -> bool { + matches!( + std::env::var(OPT_IN_ENV) + .as_deref() + .map(str::to_ascii_lowercase) + .as_deref(), + Ok("1" | "true" | "yes" | "on") + ) +} + /// Construct an `McpRuntimeManager` from on-disk config — returns `None` -/// when no servers are configured so callers can skip the entire MCP path -/// (saves system-prompt tokens + keeps the LLM from hallucinating an empty -/// tool surface). Parse failure falls back to `None` with a `tracing::warn!`. -/// Long-running servers (ACP, future HTTP) call this; CLI subcommands use -/// `load_config_or_exit` instead. +/// when MCP is not opted in (see [`OPT_IN_ENV`]) or no servers are +/// configured, so callers can skip the entire MCP path (saves system-prompt +/// tokens + keeps the LLM from hallucinating an empty tool surface). Parse +/// failure falls back to `None` with a `tracing::warn!`. Long-running +/// servers (ACP, future HTTP) call this; CLI subcommands use +/// `load_config_or_exit` instead so they work without the opt-in env var. pub fn load_runtime_or_warn() -> Option { + if !opted_in() { + return None; + } let cfg = McpConfig::load().unwrap_or_else(|e| { tracing::warn!("mcp config failed to load, starting with no servers: {e:#}"); McpConfig::default() }); if cfg.servers.is_empty() { + tracing::warn!( + "{OPT_IN_ENV} is set but no mcp servers configured at \ + ~/.openab/agent/mcp.json or ./.openab/agent/mcp.json" + ); None } else { Some(McpRuntimeManager::from_config(cfg)) diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 38d03747f..98589a223 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -397,6 +397,44 @@ mod tests { } } + #[tokio::test] + async fn race_guard_no_stuck_connecting_on_concurrent_failures() { + // Two concurrent connect() tasks race against a guaranteed-failure + // server (non-existent binary). Per chaodu F4 (#959 review), the + // race guard must never leave status stuck at `Connecting` even when + // both dial attempts fail. Final status must be Failed (terminal), + // and a third connect() after the race must still be allowed to + // retry from Failed. + let json = r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/path/openab-mcp-race-test-zzz" + } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = std::sync::Arc::new(McpRuntimeManager::from_config(cfg)); + let a = { + let mgr = mgr.clone(); + tokio::spawn(async move { mgr.connect("broken").await }) + }; + let b = { + let mgr = mgr.clone(); + tokio::spawn(async move { mgr.connect("broken").await }) + }; + let _ = a.await.unwrap(); + let _ = b.await.unwrap(); + match &mgr.statuses().await[0].1 { + ServerStatus::Failed(_) => {} + other => panic!("expected Failed after race, got {other:?}"), + } + // From Failed, a follow-up connect() must still attempt a fresh + // dial — proves the Failed → Connecting transition isn't gated out. + assert!(mgr.connect("broken").await.is_err()); + assert!(matches!(mgr.statuses().await[0].1, ServerStatus::Failed(_))); + } + #[test] fn stdio_child_env_keeps_only_baseline_plus_explicit() { let mut explicit = HashMap::new(); From b38a4c78aae351e96ea453d40e8cb1462bfa987a Mon Sep 17 00:00:00 2001 From: Brett Chien Date: Tue, 2 Jun 2026 07:10:16 +0000 Subject: [PATCH 54/54] =?UTF-8?q?feat(openab-agent/mcp):=20F1=20discovery?= =?UTF-8?q?=20slice=20=E2=80=94=20status=20idle=20+=20system-prompt=20cata?= =?UTF-8?q?logue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #959 chaodu F1 follow-up. The single `mcp` meta-tool (§5.1/5.2) keeps the LLM-facing surface bounded but hides configured server names from the LLM. F1 PoC reproduced the failure mode: "use mcp fs to list /workspace" → LLM calls `status` → sees `fs: disconnected` → reads as broken → refuses. Two fixes: Status semantic (meta_tool.rs): - `status_label` returns `idle` (not `disconnected`) for servers in `ServerStatus::Disconnected` with no failure history. `idle` reads as "ready, lazy-dial on first call"; `failed` keeps "tried and broke" with dial / handshake reason in `last_error`. - Updated 2 existing tests + new `status_labels_failed_servers_with_last_error`. System-prompt server catalogue (mod.rs + runtime.rs + config.rs + agent.rs): - New `mcp::format_system_prompt_appendix(manager)` emits a `## MCP tool` section: tool intro + `- **{name}** ({transport})` per configured server. OAuth servers get `requires `mcp login `` annotation. - `McpRuntimeManager` now snapshots a sorted `Arc<[CatalogEntry]>` at `from_config` time; sync `catalog()` accessor lets `build_system_prompt` read it without lock/async coordination. - `ServerConfig::requires_oauth()` helper for the catalogue. - `Agent::build_system_prompt` takes `Option<&McpRuntimeManager>`; the appendix is appended between base prompt + skills catalogue. Old static `MCP_SYSTEM_PROMPT_APPENDIX` const removed. - 2 agent.rs tests assert catalogue presence when manager is Some, and absence when None. Token-budget invariance preserved: section grows O(server count), not O(server count × tool count) — per-tool details stay behind `mcp(action: "list_tools", server)`. Mirrors Skills catalogue pattern. ADR §5.4.2 added covering both choices + token-budget impact. Tests: 62/62 with `--features mcp`; 25/25 default. cargo fmt + clippy clean. Co-Authored-By: Claude Opus 4.7 --- docs/adr/openab-agent-mcp.md | 12 ++++ openab-agent/src/agent.rs | 100 ++++++++++++++++++++++------ openab-agent/src/mcp/config.rs | 7 ++ openab-agent/src/mcp/meta_tool.rs | 49 +++++++++++++- openab-agent/src/mcp/mod.rs | 105 ++++++++++++++++++++++++++++++ openab-agent/src/mcp/runtime.rs | 63 ++++++++++++++++++ 6 files changed, 314 insertions(+), 22 deletions(-) diff --git a/docs/adr/openab-agent-mcp.md b/docs/adr/openab-agent-mcp.md index b0b446ee4..c90f1fa47 100644 --- a/docs/adr/openab-agent-mcp.md +++ b/docs/adr/openab-agent-mcp.md @@ -367,6 +367,18 @@ Three intentional choices that surfaced in PR #959 review (chaodu F2 / F6 / F7) 2. **Stdio child env scrubbing (F2, intentional security).** `Dial::Stdio` calls `env_clear()` and passes only the 4-var baseline allowlist (`HOME`, `PATH`, `TERM`, `USER` on Unix; Windows equivalents) plus the explicit `env:` map from `mcp.json`. Reasoning: openab-agent inherits high-value secrets from its launcher (`DISCORD_BOT_TOKEN`, `ANTHROPIC_API_KEY`, AWS credentials, GitHub tokens) and stdio MCP servers are third-party binaries with no contractual constraint on what they read from their environment. Leaking those by default is a much larger risk than the convenience of inherited proxy/locale settings. Servers that genuinely need additional env (proxy, certs, locale, provider config) declare them per-server in the config — a future `inherit_env` opt-in list is tracked as follow-up if user demand surfaces. 3. **Per-process shared `McpRuntimeManager` (F7).** A single manager is `Arc`-cloned across all ACP sessions of the same process. Reasoning: MCP servers are expensive to spawn (stdio child fork, HTTP handshake + OAuth) and most are pure-state read-only tools where cross-session visibility is benign. Trade-off: a `mcp connect github` in session A makes the `github` server immediately available in session B. We accept this — per-session isolation would multiply child processes and break the breaker / TTL accounting in §5.7 / §5.9. +#### 5.4.2 Discovery slice — bounded catalogue + idle semantics (F1) + +The §5.1 / §5.2 single `mcp` meta-tool minimizes the LLM-facing tool surface, but it also *hides* the configured server names from the LLM. The F1 PoC reproduced the resulting failure mode: when a user said "use mcp fs to list /workspace", the LLM called `mcp(action: "status")`, saw `fs: disconnected`, read it as "broken", and refused to retry. Two intentional choices remove that failure mode without re-flattening the tool surface: + +1. **Static server catalogue in the system prompt.** `mcp::format_system_prompt_appendix(manager)` appends a `## MCP tool` section containing the tool intro plus `- **{name}** ({transport})` per configured server (with a `requires \`mcp login \`` annotation when an `oauth` block is present). The list is built once at `Agent::new_boxed` time from a sync `manager.catalog()` snapshot frozen at `from_config`, so no async or lock coordination is needed inside `build_system_prompt`. Token-budget invariance is preserved: section size grows **O(server count)** — not O(server count × tool count) — because per-tool descriptors stay behind `mcp(action: "list_tools", server)`. The PoC measured ≤100 tokens per server-side entry under this pattern; flattening tools per-server (≈ what the multi-tool alternative in §4.1 would expose) blows that budget by ~30× for a typical 30-tool github server. + + Mirror with the Skills catalogue (`skills::format_skills_prompt`): both advertise *names + headline metadata* in the always-present system prompt and force *body / contract* discovery through an explicit tool call (`mcp(action: "list_tools" | "describe_tool")` here, `read("skills/")` there). Same intent (the LLM knows the surface exists; details are lazy), same token-budget shape (linear in surface count, not in surface depth). + +2. **Status label `idle` for lazy-connect servers.** The meta-tool's `status_label` returns `idle` — not `disconnected` — when a server is in `ServerStatus::Disconnected` with no failure history. `disconnected` reads as "broken" to the LLM (PoC observation above); `idle` correctly signals "ready, will dial on first call". The genuine failure case still maps to `status: "failed"` with the dial / handshake error in `last_error`, so the LLM can distinguish "not tried yet" from "tried and broke". The system-prompt section also advertises these semantics explicitly so the LLM doesn't have to guess. + +These choices are wired into PR #959 (Phase 1) because the failure mode they fix is reachable as soon as `list_servers` and `status` ship — deferring to Phase 2/3 would mean shipping a known-broken discovery UX on the foundation slice. + ### 5.5 `rmcp` dependency & features ```toml diff --git a/openab-agent/src/agent.rs b/openab-agent/src/agent.rs index 01e4a1bb5..7a2b69c91 100644 --- a/openab-agent/src/agent.rs +++ b/openab-agent/src/agent.rs @@ -20,12 +20,12 @@ You have these tools available: Be direct and concise. Execute tasks immediately rather than explaining what you would do. When you need to understand code, read the relevant files first."#; -#[cfg(feature = "mcp")] -const MCP_SYSTEM_PROMPT_APPENDIX: &str = "\n\nAdditional tool:\n\ - - mcp: Talk to configured MCP servers. Call `mcp(action=\"list_servers\")` \ - to see what's configured, then `mcp(action=\"list_tools\", server=...)` to \ - discover per-server tools. Use `mcp(action=\"help\")` only if action shapes \ - are unclear."; +// The MCP system-prompt appendix is generated dynamically by +// `mcp::format_system_prompt_appendix(manager)` so the LLM sees both the +// `mcp` tool intro AND a server catalogue (PR #959 F1 discovery slice). +// Previously a static const here, but that hid the configured server names +// from the LLM and produced the "fs is disconnected, I give up" failure +// mode observed in the F1 PoC. const MAX_TOOL_LOOPS: usize = 50; /// Maximum number of messages to keep in context. When exceeded, oldest @@ -45,7 +45,11 @@ pub struct Agent { impl Agent { #[cfg(test)] pub fn new(provider: impl LlmProvider + 'static, working_dir: String) -> Self { - let system_prompt = Self::build_system_prompt(&working_dir, false); + let system_prompt = Self::build_system_prompt( + &working_dir, + #[cfg(feature = "mcp")] + None, + ); Self { provider: Box::new(provider), messages: Vec::new(), @@ -62,11 +66,11 @@ impl Agent { working_dir: String, #[cfg(feature = "mcp")] mcp_manager: Option, ) -> Self { - #[cfg(feature = "mcp")] - let has_mcp = mcp_manager.is_some(); - #[cfg(not(feature = "mcp"))] - let has_mcp = false; - let system_prompt = Self::build_system_prompt(&working_dir, has_mcp); + let system_prompt = Self::build_system_prompt( + &working_dir, + #[cfg(feature = "mcp")] + mcp_manager.as_ref(), + ); #[cfg(feature = "mcp")] let tools = { let mut t = tools::tool_definitions(); @@ -88,11 +92,18 @@ impl Agent { } } - /// Run the agent with a user prompt, executing tool calls until completion. - /// Returns the final text response. - fn build_system_prompt(working_dir: &str, mcp_enabled: bool) -> String { - #[cfg(not(feature = "mcp"))] - let _ = mcp_enabled; + /// Build the system prompt sent on every LLM call. Composition order: + /// 1. base prompt (`SYSTEM_PROMPT`, optionally prefixed by project-local + /// `AGENTS.md`), + /// 2. MCP appendix — tool intro + server catalogue (PR #959 F1 + /// discovery slice); only when `mcp_manager` is `Some`, + /// 3. skills catalogue. + /// + /// Built once at `Agent::new*` time and reused on every `call_llm`. + fn build_system_prompt( + working_dir: &str, + #[cfg(feature = "mcp")] mcp_manager: Option<&McpRuntimeManager>, + ) -> String { let wd = std::path::Path::new(working_dir); let agents_md = wd.join("AGENTS.md"); let custom = std::fs::read_to_string(&agents_md).unwrap_or_default(); @@ -104,8 +115,8 @@ impl Agent { }; #[cfg(feature = "mcp")] - let base = if mcp_enabled { - format!("{base}{MCP_SYSTEM_PROMPT_APPENDIX}") + let base = if let Some(mgr) = mcp_manager { + format!("{base}{}", mcp::format_system_prompt_appendix(mgr)) } else { base }; @@ -380,6 +391,57 @@ mod tests { } } + #[cfg(feature = "mcp")] + #[test] + fn build_system_prompt_includes_mcp_catalogue_when_manager_provided() { + // PR #959 F1 discovery slice: when an MCP manager is wired in, the + // system prompt must surface the configured server catalogue so the + // LLM knows `list_tools` is worth calling (the "fs disconnected, I + // give up" failure mode the static const previously caused). + use crate::mcp::config::McpConfig; + let cfg: McpConfig = serde_json::from_str( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear" } + } + } + }"#, + ) + .unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + + let tmp = tempfile::TempDir::new().unwrap(); + let prompt = Agent::build_system_prompt(&tmp.path().to_string_lossy(), Some(&mgr)); + + assert!( + prompt.contains("## MCP tool"), + "missing MCP section:\n{prompt}" + ); + assert!( + prompt.contains("**fs** (stdio)"), + "missing fs catalogue entry:\n{prompt}" + ); + assert!( + prompt.contains("requires `mcp login linear`"), + "missing OAuth login hint:\n{prompt}" + ); + } + + #[cfg(feature = "mcp")] + #[test] + fn build_system_prompt_omits_mcp_section_when_no_manager() { + let tmp = tempfile::TempDir::new().unwrap(); + let prompt = Agent::build_system_prompt(&tmp.path().to_string_lossy(), None); + assert!( + !prompt.contains("## MCP tool"), + "MCP section leaked into prompt without manager:\n{prompt}" + ); + } + #[tokio::test] #[ignore] // Integration test: executes real file tools async fn test_agent_multiple_tool_calls() { diff --git a/openab-agent/src/mcp/config.rs b/openab-agent/src/mcp/config.rs index 95a79b817..fbb25087b 100644 --- a/openab-agent/src/mcp/config.rs +++ b/openab-agent/src/mcp/config.rs @@ -47,6 +47,13 @@ impl ServerConfig { ServerConfig::Http { .. } => "http", } } + + /// `true` when the server is HTTP with an `oauth` block — used by the + /// system-prompt catalogue (PR #959 F1 discovery slice) to hint that + /// the LLM should ask the user to run `mcp login ` before calling. + pub fn requires_oauth(&self) -> bool { + matches!(self, ServerConfig::Http { oauth: Some(_), .. }) + } } #[derive(Debug, Default, Clone, Serialize, Deserialize)] diff --git a/openab-agent/src/mcp/meta_tool.rs b/openab-agent/src/mcp/meta_tool.rs index 22aafc639..eefe5b275 100644 --- a/openab-agent/src/mcp/meta_tool.rs +++ b/openab-agent/src/mcp/meta_tool.rs @@ -186,7 +186,13 @@ async fn list_servers(manager: &McpRuntimeManager) -> Value { fn status_label(status: &ServerStatus) -> &'static str { match status { - ServerStatus::Disconnected => "disconnected", + // `Disconnected` is the cold/idle state — config loaded but the + // child process hasn't been spawned yet. Lazy connect happens on + // the first `call` / `list_tools`, so this is NOT a failure mode. + // Earlier label `"disconnected"` confused LLMs into reporting the + // server as broken on a plain `list_servers` (PR #959 F1 PoC + // observation). `"failed"` already covers the error case below. + ServerStatus::Disconnected => "idle", ServerStatus::Connecting => "connecting", ServerStatus::Connected => "connected", ServerStatus::Failed(_) => "failed", @@ -230,7 +236,7 @@ mod tests { .map(|e| (e["name"].as_str().unwrap(), e)) .collect(); assert_eq!(by_name["fs"]["transport"], "stdio"); - assert_eq!(by_name["fs"]["status"], "disconnected"); + assert_eq!(by_name["fs"]["status"], "idle"); assert_eq!(by_name["linear"]["transport"], "http"); } @@ -358,11 +364,48 @@ mod tests { let entries = result.as_array().unwrap(); assert_eq!(entries.len(), 2); for e in entries { - assert_eq!(e["status"], "disconnected"); + assert_eq!(e["status"], "idle"); assert!(e["last_error"].is_null()); } } + #[tokio::test] + async fn status_labels_failed_servers_with_last_error() { + // Status uses a `Failed` state distinct from `idle`; the LLM should + // see the failure surfaced explicitly via `status: "failed"` + + // `last_error: ` rather than collapsing into `idle`. + let mgr = mgr_from( + r#"{ + "mcpServers": { + "broken": { + "type": "stdio", + "command": "/nonexistent/openab-mcp-test-stub-zzz" + } + } + }"#, + ); + // Trip the Failed state via a connect attempt that will fail at spawn. + let _ = dispatch( + &mgr, + Action::Call { + server: "broken".into(), + tool: "anything".into(), + arguments: serde_json::json!({}), + }, + ) + .await; + let result = dispatch(&mgr, Action::Status { server: None }) + .await + .unwrap(); + let entries = result.as_array().unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0]["status"], "failed"); + assert!( + !entries[0]["last_error"].is_null(), + "Failed status should carry last_error" + ); + } + #[tokio::test] async fn status_filter_by_server_returns_single_entry() { let mgr = mgr_from( diff --git a/openab-agent/src/mcp/mod.rs b/openab-agent/src/mcp/mod.rs index 36502bd40..6f1e8c514 100644 --- a/openab-agent/src/mcp/mod.rs +++ b/openab-agent/src/mcp/mod.rs @@ -104,6 +104,51 @@ pub fn load_runtime_or_warn() -> Option { } } +/// Build the MCP section appended to the system prompt at session start +/// (PR #959 chaodu F1, discovery slice). Mirrors the skills-catalogue +/// pattern: advertise *server names + transports* — not individual tools — +/// so the LLM knows the surface exists and can call +/// `mcp(action="list_tools", server=...)` to discover capabilities on demand. +/// +/// Token-budget invariance: the section grows O(server count), not +/// O(server count × tool count). PR #959 F1 PoC measured ≤100 tokens per +/// server-side meta entry under this pattern; flattening per-tool would +/// blow that invariance up. +/// +/// Status semantics worth surfacing to the LLM (matches `status_label` in +/// `meta_tool`): `idle` = ready (lazy-connect on first call), not broken. +pub fn format_system_prompt_appendix(manager: &McpRuntimeManager) -> String { + let catalog = manager.catalog(); + let mut out = String::from( + "\n\n## MCP tool\n\n\ + Use the `mcp` tool to talk to configured MCP servers. Key actions: \ + `list_tools(server)` discovers a server's tools, \ + `call(server, tool, arguments)` invokes one. Servers auto-connect \ + on first use — `status: \"idle\"` means ready (not broken); \ + `status: \"failed\"` carries the error reason in `last_error`. \ + Call `mcp(action=\"help\")` only if action shapes are unclear.\n\n", + ); + if catalog.is_empty() { + out.push_str( + "No MCP servers are configured. The `mcp` tool will report an \ + empty `list_servers` until one is added.\n", + ); + return out; + } + out.push_str("Configured servers:\n"); + for entry in catalog { + if entry.requires_oauth { + out.push_str(&format!( + "- **{}** ({}, requires `mcp login {}` before first call)\n", + entry.name, entry.transport, entry.name, + )); + } else { + out.push_str(&format!("- **{}** ({})\n", entry.name, entry.transport)); + } + } + out +} + /// `openab-agent mcp list [--resolve]`. /// /// Default: print configs verbatim (`${env:VAR}` placeholders kept as-is) so @@ -180,3 +225,63 @@ pub async fn cli_connect(name: String) { } } } + +#[cfg(test)] +mod tests { + use super::*; + use config::McpConfig; + + fn mgr_from(json: &str) -> McpRuntimeManager { + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + McpRuntimeManager::from_config(cfg) + } + + #[test] + fn format_system_prompt_appendix_lists_each_server() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "weather": { "type": "http", "url": "https://example/mcp" } + } + }"#, + ); + let s = format_system_prompt_appendix(&mgr); + assert!(s.contains("## MCP tool")); + assert!(s.contains("Configured servers:")); + assert!(s.contains("**fs** (stdio)")); + assert!(s.contains("**weather** (http)")); + // Status semantics must be advertised so LLM doesn't misread `idle` + // as a failure (PR #959 F1 PoC observation). + assert!(s.contains("idle")); + } + + #[test] + fn format_system_prompt_appendix_marks_oauth_servers() { + let mgr = mgr_from( + r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear", "scopes": ["read"] } + } + } + }"#, + ); + let s = format_system_prompt_appendix(&mgr); + assert!( + s.contains("requires `mcp login linear`"), + "OAuth servers must surface the login hint; got:\n{s}" + ); + } + + #[test] + fn format_system_prompt_appendix_handles_empty_catalog() { + let mgr = mgr_from(r#"{"mcpServers":{}}"#); + let s = format_system_prompt_appendix(&mgr); + assert!(s.contains("## MCP tool")); + assert!(s.contains("No MCP servers are configured")); + assert!(!s.contains("Configured servers:")); + } +} diff --git a/openab-agent/src/mcp/runtime.rs b/openab-agent/src/mcp/runtime.rs index 98589a223..5af3aa4a8 100644 --- a/openab-agent/src/mcp/runtime.rs +++ b/openab-agent/src/mcp/runtime.rs @@ -66,15 +66,42 @@ impl std::fmt::Debug for ServerHandle { } } +/// Immutable, lock-free view of a configured server for catalogue +/// advertising in the system prompt (PR #959 chaodu F1, discovery slice). +/// Lives outside the `RwLock` so `format_system_prompt_appendix` +/// can build the prompt synchronously at `Agent::new_with_provider` time +/// without coordinating with the async runtime. +#[derive(Debug, Clone)] +pub struct CatalogEntry { + pub name: String, + pub transport: &'static str, + pub requires_oauth: bool, +} + /// Owns one `ServerHandle` per configured server, behind an async `RwLock` /// so the foreground LLM path and the background eviction task can share it. #[derive(Debug, Default, Clone)] pub struct McpRuntimeManager { handles: Arc>>, + /// Sorted-by-name snapshot of static server identity (name + transport + + /// oauth-required flag). Frozen at `from_config` — never mutated, so it + /// is safe to read without locking. Used by the system-prompt catalogue + /// (PR #959 F1 discovery slice). + catalog: Arc<[CatalogEntry]>, } impl McpRuntimeManager { pub fn from_config(cfg: McpConfig) -> Self { + let mut catalog: Vec = cfg + .servers + .iter() + .map(|(name, config)| CatalogEntry { + name: name.clone(), + transport: config.transport_label(), + requires_oauth: config.requires_oauth(), + }) + .collect(); + catalog.sort_by(|a, b| a.name.cmp(&b.name)); let handles: HashMap<_, _> = cfg .servers .into_iter() @@ -90,9 +117,16 @@ impl McpRuntimeManager { .collect(); Self { handles: Arc::new(RwLock::new(handles)), + catalog: catalog.into(), } } + /// Lock-free, synchronous access to the configured-server catalogue. + /// See `CatalogEntry` for the rationale. + pub fn catalog(&self) -> &[CatalogEntry] { + &self.catalog + } + /// Snapshot of `(name, status)` sorted by name. Clones out so the read /// guard is dropped before returning — callers don't hold a lock. pub async fn statuses(&self) -> Vec<(String, ServerStatus)> { @@ -313,6 +347,35 @@ mod tests { let mgr = McpRuntimeManager::from_config(McpConfig::default()); assert!(mgr.is_empty().await); assert!(mgr.statuses().await.is_empty()); + assert!(mgr.catalog().is_empty()); + } + + #[test] + fn catalog_is_sorted_and_flags_oauth() { + let json = r#"{ + "mcpServers": { + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp", + "oauth": { "provider": "linear", "scopes": ["read"] } + }, + "fs": { "type": "stdio", "command": "mcp-server-filesystem" }, + "weather": { "type": "http", "url": "https://example/mcp" } + } + }"#; + let cfg: McpConfig = serde_json::from_str(json).unwrap(); + let mgr = McpRuntimeManager::from_config(cfg); + let cat = mgr.catalog(); + let names: Vec<&str> = cat.iter().map(|e| e.name.as_str()).collect(); + assert_eq!(names, vec!["fs", "linear", "weather"]); + let by_name: std::collections::HashMap<&str, &CatalogEntry> = + cat.iter().map(|e| (e.name.as_str(), e)).collect(); + assert_eq!(by_name["fs"].transport, "stdio"); + assert!(!by_name["fs"].requires_oauth); + assert_eq!(by_name["linear"].transport, "http"); + assert!(by_name["linear"].requires_oauth); + assert_eq!(by_name["weather"].transport, "http"); + assert!(!by_name["weather"].requires_oauth); } #[tokio::test]